本文整理汇总了Python中mediawords.db.DatabaseHandler.create方法的典型用法代码示例。如果您正苦于以下问题:Python DatabaseHandler.create方法的具体用法?Python DatabaseHandler.create怎么用?Python DatabaseHandler.create使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mediawords.db.DatabaseHandler
的用法示例。
在下文中一共展示了DatabaseHandler.create方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _create_child_download_for_story
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import create [as 别名]
def _create_child_download_for_story(db: DatabaseHandler, story: dict, parent_download: dict) -> None:
"""Create a pending download for the story's URL."""
story = decode_object_from_bytes_if_needed(story)
parent_download = decode_object_from_bytes_if_needed(parent_download)
download = {
'feeds_id': parent_download['feeds_id'],
'stories_id': story['stories_id'],
'parent': parent_download['downloads_id'],
'url': story['url'],
'host': get_url_host(story['url']),
'type': 'content',
'sequence': 1,
'state': 'pending',
'priority': parent_download['priority'],
'extracted': False,
}
content_delay = db.query("""
SELECT content_delay
FROM media
WHERE media_id = %(media_id)s
""", {'media_id': story['media_id']}).flat()[0]
if content_delay:
# Delay download of content this many hours. his is useful for sources that are likely to significantly change
# content in the hours after it is first published.
now = int(datetime.datetime.now(datetime.timezone.utc).timestamp())
download_at_timestamp = now + (content_delay * 60 * 60)
download['download_time'] = get_sql_date_from_epoch(download_at_timestamp)
db.create(table='downloads', insert_hash=download)
示例2: get_consistent_color
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import create [as 别名]
def get_consistent_color(db: DatabaseHandler, item_set: str, item_id: str) -> str:
"""Return the same hex color (e.g. "ff0000" for the same set / ID combination every time this function is called."""
item_set = decode_object_from_bytes_if_needed(item_set)
item_id = decode_object_from_bytes_if_needed(item_id)
# Always return grey for null or not typed values
if item_id.lower() in {'null', 'not typed'}:
return '999999'
color = db.query("""SELECT color FROM color_sets WHERE color_set = %(item_set)s AND id = %(item_id)s""", {
'item_set': item_set,
'item_id': item_id,
}).flat()
if color is not None and len(color):
if isinstance(color, list):
color = color[0]
return color
set_colors = db.query("""SELECT color FROM color_sets WHERE color_set = %(item_set)s""", {
'item_set': item_set,
}).flat()
if set_colors is not None:
if not isinstance(set_colors, list):
set_colors = [set_colors]
existing_colors = set()
if set_colors is not None:
for color in set_colors:
existing_colors.add(color)
# Use the hard coded palette of 25 colors if possible
new_color = None
for color in __MC_COLORS:
if color not in existing_colors:
new_color = color
break
# Otherwise, just generate a random color
if new_color is None:
colors = analogous_color(color='0000ff', return_slices=256, split_slices=255)
new_color = random.choice(colors)
db.create(table='color_sets', insert_hash={
'color_set': item_set,
'id': item_id,
'color': new_color,
})
return new_color
示例3: add_story
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import create [as 别名]
def add_story(db: DatabaseHandler, story: dict, feeds_id: int, skip_checking_if_new: bool = False) -> Optional[dict]:
"""If the story is new, add story to the database with the feed of the download as story feed.
Returns created story or None if story wasn't created.
"""
story = decode_object_from_bytes_if_needed(story)
if isinstance(feeds_id, bytes):
feeds_id = decode_object_from_bytes_if_needed(feeds_id)
feeds_id = int(feeds_id)
if isinstance(skip_checking_if_new, bytes):
skip_checking_if_new = decode_object_from_bytes_if_needed(skip_checking_if_new)
skip_checking_if_new = bool(int(skip_checking_if_new))
if db.in_transaction():
raise McAddStoryException("add_story() can't be run from within transaction.")
db.begin()
db.query("LOCK TABLE stories IN ROW EXCLUSIVE MODE")
if not skip_checking_if_new:
if not is_new(db=db, story=story):
log.debug("Story '{}' is not new.".format(story['url']))
db.commit()
return None
medium = db.find_by_id(table='media', object_id=story['media_id'])
if story.get('full_text_rss', None) is None:
story['full_text_rss'] = medium.get('full_text_rss', False) or False
if len(story.get('description', '')) == 0:
story['full_text_rss'] = False
try:
story = db.create(table='stories', insert_hash=story)
except Exception as ex:
db.rollback()
# FIXME get rid of this, replace with native upsert on "stories_guid" unique constraint
if 'unique constraint \"stories_guid' in str(ex):
log.warning(
"Failed to add story for '{}' to GUID conflict (guid = '{}')".format(story['url'], story['guid'])
)
return None
else:
raise McAddStoryException("Error adding story: {}\nStory: {}".format(str(ex), str(story)))
db.find_or_create(
table='feeds_stories_map',
insert_hash={
'stories_id': story['stories_id'],
'feeds_id': feeds_id,
}
)
db.commit()
return story
示例4: extract_links_for_topic_story
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import create [as 别名]
def extract_links_for_topic_story(db: DatabaseHandler, story: dict, topic: dict) -> None:
"""
Extract links from a story and insert them into the topic_links table for the given topic.
After the story is processed, set topic_stories.spidered to true for that story. Calls get_links_from_story
on each story.
Almost all errors are caught by this function saved in topic_stories.link_mine_error. In the case of an error
topic_stories.link_mined is also set to true.
Arguments:
db - db handle
story - story dict from db
topic - topic dict from db
Returns:
None
"""
try:
log.info("mining %s %s for topic %s .." % (story['title'], story['url'], topic['name']))
links = get_links_from_story(db, story)
for link in links:
if mediawords.tm.domains.skip_self_linked_domain_url(db, topic['topics_id'], story['url'], link):
log.info("skipping self linked domain url...")
continue
topic_link = {
'topics_id': topic['topics_id'],
'stories_id': story['stories_id'],
'url': link
}
db.create('topic_links', topic_link)
mediawords.tm.domains.increment_domain_links(db, topic_link)
link_mine_error = ''
except Exception:
link_mine_error = traceback.format_exc()
db.query(
"""
update topic_stories set link_mined = 't', link_mine_error = %(c)s
where stories_id = %(a)s and topics_id = %(b)s
""",
{'a': story['stories_id'], 'b': topic['topics_id'], 'c': link_mine_error})
示例5: merge_foreign_rss_stories
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import create [as 别名]
def merge_foreign_rss_stories(db: DatabaseHandler, topic: dict) -> None:
"""Move all topic stories with a foreign_rss_links medium from topic_stories back to topic_seed_urls."""
topic = decode_object_from_bytes_if_needed(topic)
stories = db.query(
"""
select s.*
from stories s, topic_stories ts, media m
where
s.stories_id = ts.stories_id and
s.media_id = m.media_id and
m.foreign_rss_links = true and
ts.topics_id = %(a)s and
not ts.valid_foreign_rss_story
""",
{'a': topic['topics_id']}).hashes()
for story in stories:
download = db.query(
"select * from downloads where stories_id = %(a)s order by downloads_id limit 1",
{'a': story['stories_id']}).hash()
content = ''
try:
content = mediawords.dbi.downloads.fetch_content(db, download)
except Exception:
pass
db.begin()
db.create('topic_seed_urls', {
'url': story['url'],
'topics_id': topic['topics_id'],
'source': 'merge_foreign_rss_stories',
'content': content
})
db.query(
"delete from topic_stories where stories_id = %(a)s and topics_id = %(b)s",
{'a': story['stories_id'], 'b': topic['topics_id']})
db.commit()
示例6: _add_topic_tweet_single_day
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import create [as 别名]
def _add_topic_tweet_single_day(
db: DatabaseHandler,
topic: dict,
day: datetime.datetime,
ch_class: typing.Type[AbstractCrimsonHexagon]) -> dict:
"""
Add a row to topic_tweet_day if it does not already exist. fetch data for new row from CH.
Arguments:
db - database handle
topic - topic dict
day - date to fetch eg '2017-12-30'
ch_class - AbstractCrimsonHexagon class
Return:
None
"""
# the perl-python layer was segfaulting until I added the str() around day below -hal
topic_tweet_day = db.query(
"select * from topic_tweet_days where topics_id = %(a)s and day = %(b)s",
{'a': topic['topics_id'], 'b': str(day)}).hash()
if topic_tweet_day is not None and topic_tweet_day['tweets_fetched']:
raise McFetchTopicTweetDateFetchedException("tweets already fetched for day " + str(day))
# if we have a ttd but had not finished fetching tweets, delete it and start over
if topic_tweet_day is not None:
db.delete_by_id('topic_tweet_days', topic_tweet_day['topic_tweet_days_id'])
ch_posts = ch_class.fetch_posts(topic['ch_monitor_id'], day)
tweet_count = ch_posts['totalPostsAvailable']
num_ch_tweets = len(ch_posts['posts'])
topic_tweet_day = db.create(
'topic_tweet_days',
{
'topics_id': topic['topics_id'],
'day': day,
'tweet_count': tweet_count,
'num_ch_tweets': num_ch_tweets,
'tweets_fetched': False
})
topic_tweet_day['ch_posts'] = ch_posts
return topic_tweet_day
示例7: create_download_for_new_story
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import create [as 别名]
def create_download_for_new_story(db: DatabaseHandler, story: dict, feed: dict) -> dict:
"""Create and return download object in database for the new story."""
download = {
'feeds_id': feed['feeds_id'],
'stories_id': story['stories_id'],
'url': story['url'],
'host': mediawords.util.url.get_url_host(story['url']),
'type': 'content',
'sequence': 1,
'state': 'success',
'path': 'content:pending',
'priority': 1,
'extracted': 'f'
}
download = db.create('downloads', download)
return download
示例8: add_user
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import create [as 别名]
def add_user(db: DatabaseHandler, new_user: NewUser) -> None:
"""Add new user."""
if not new_user:
raise McAuthRegisterException("New user is undefined.")
# Check if user already exists
user_exists = db.query("""
SELECT auth_users_id
FROM auth_users
WHERE email = %(email)s
LIMIT 1
""", {'email': new_user.email()}).hash()
if user_exists is not None and 'auth_users_id' in user_exists:
raise McAuthRegisterException("User with email '%s' already exists." % new_user.email())
# Hash + validate the password
try:
password_hash = generate_secure_hash(password=new_user.password())
if not password_hash:
raise McAuthRegisterException("Password hash is empty.")
except Exception as ex:
log.error("Unable to hash a new password: {}".format(ex))
raise McAuthRegisterException('Unable to hash a new password.')
db.begin()
# Create the user
db.create(
table='auth_users',
insert_hash={
'email': new_user.email(),
'password_hash': password_hash,
'full_name': new_user.full_name(),
'notes': new_user.notes(),
'active': bool(int(new_user.active())),
}
)
# Fetch the user's ID
try:
user = user_info(db=db, email=new_user.email())
except Exception as ex:
db.rollback()
raise McAuthRegisterException("I've attempted to create the user but it doesn't exist: %s" % str(ex))
# Create roles
try:
for auth_roles_id in new_user.role_ids():
db.create(table='auth_users_roles_map', insert_hash={
'auth_users_id': user.user_id(),
'auth_roles_id': auth_roles_id,
})
except Exception as ex:
raise McAuthRegisterException("Unable to create roles: %s" % str(ex))
# Update limits (if they're defined)
if new_user.weekly_requests_limit() is not None:
db.query("""
UPDATE auth_user_limits
SET weekly_requests_limit = %(weekly_requests_limit)s
WHERE auth_users_id = %(auth_users_id)s
""", {
'auth_users_id': user.user_id(),
'weekly_requests_limit': new_user.weekly_requests_limit(),
})
if new_user.weekly_requested_items_limit() is not None:
db.query("""
UPDATE auth_user_limits
SET weekly_requested_items_limit = %(weekly_requested_items_limit)s
WHERE auth_users_id = %(auth_users_id)s
""", {
'auth_users_id': user.user_id(),
'weekly_requested_items_limit': new_user.weekly_requested_items_limit(),
})
# Subscribe to newsletter
if new_user.subscribe_to_newsletter():
db.create(table='auth_users_subscribe_to_newsletter', insert_hash={'auth_users_id': user.user_id()})
if not new_user.active():
send_user_activation_token(
db=db,
email=new_user.email(),
activation_link=new_user.activation_url(),
subscribe_to_newsletter=new_user.subscribe_to_newsletter(),
)
db.commit()
示例9: copy_story_to_new_medium
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import create [as 别名]
def copy_story_to_new_medium(db: DatabaseHandler, topic: dict, old_story: dict, new_medium: dict) -> dict:
"""Copy story to new medium.
Copy the given story, assigning the new media_id and copying over the download, extracted text, and so on.
Return the new story.
"""
story = {
'url': old_story['url'],
'media_id': new_medium['media_id'],
'guid': old_story['guid'],
'publish_date': old_story['publish_date'],
'collect_date': mediawords.util.sql.sql_now(),
'description': old_story['description'],
'title': old_story['title']
}
story = db.create('stories', story)
add_to_topic_stories(db=db, story=story, topic=topic, valid_foreign_rss_story=True)
db.query(
"""
insert into stories_tags_map (stories_id, tags_id)
select %(a)s, stm.tags_id from stories_tags_map stm where stm.stories_id = %(b)s
""",
{'a': story['stories_id'], 'b': old_story['stories_id']})
feed = get_spider_feed(db, new_medium)
db.create('feeds_stories_map', {'feeds_id': feed['feeds_id'], 'stories_id': story['stories_id']})
old_download = db.query(
"select * from downloads where stories_id = %(a)s order by downloads_id limit 1",
{'a': old_story['stories_id']}).hash()
download = create_download_for_new_story(db, story, feed)
if old_download is not None:
try:
content = mediawords.dbi.downloads.fetch_content(db, old_download)
download = mediawords.dbi.downloads.store_content(db, download, content)
except (mediawords.dbi.downloads.McDBIDownloadsException,
mediawords.key_value_store.amazon_s3.McAmazonS3StoreException):
download_update = dict([(f, old_download[f]) for f in ['state', 'error_message', 'download_time']])
db.update_by_id('downloads', download['downloads_id'], download_update)
db.query(
"""
insert into download_texts (downloads_id, download_text, download_text_length)
select %(a)s, dt.download_text, dt.download_text_length
from download_texts dt
where dt.downloads_id = %(a)s
""",
{'a': download['downloads_id']})
db.query(
"""
insert into story_sentences (stories_id, sentence_number, sentence, media_id, publish_date, language)
select %(a)s, sentence_number, sentence, media_id, publish_date, language
from story_sentences
where stories_id = %(b)s
""",
{'a': story['stories_id'], 'b': old_story['stories_id']})
return story
示例10: generate_story
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import create [as 别名]
def generate_story(
db: DatabaseHandler,
url: str,
content: str,
title: str = None,
publish_date: datetime.datetime = None,
fallback_date: typing.Optional[datetime.datetime] = None) -> dict:
"""Add a new story to the database by guessing metadata using the given url and content.
This function guesses the medium, feed, title, and date of the story from the url and content.
If inserting the story results in a unique constraint error based on media_id and url, return
the existing story instead.
Arguments:
db - db handle
url - story url
content - story content
fallback_date - fallback to this date if the date guesser fails to find a date
"""
if len(url) < 1:
raise McTMStoriesException("url must not be an empty string")
url = url[0:mediawords.dbi.stories.stories.MAX_URL_LENGTH]
medium = mediawords.tm.media.guess_medium(db, url)
feed = get_spider_feed(db, medium)
spidered_tag = mediawords.tm.media.get_spidered_tag(db)
if title is None:
title = mediawords.util.parse_html.html_title(content, url, mediawords.dbi.stories.stories.MAX_TITLE_LENGTH)
story = {
'url': url,
'guid': url,
'media_id': medium['media_id'],
'title': title,
'description': ''
}
# postgres refuses to insert text values with the null character
for field in ('url', 'guid', 'title'):
story[field] = re2.sub('\x00', '', story[field])
if publish_date is None:
date_guess = guess_date(url, content)
story['publish_date'] = date_guess.date if date_guess.found else fallback_date
if story['publish_date'] is None:
story['publish_date'] = datetime.datetime.now().isoformat()
else:
story['publish_date'] = publish_date
try:
story = db.create('stories', story)
except mediawords.db.exceptions.handler.McUniqueConstraintException:
return mediawords.tm.stories.get_story_match(db=db, url=story['url'])
except Exception:
raise McTMStoriesException("Error adding story: %s" % traceback.format_exc())
db.query(
"insert into stories_tags_map (stories_id, tags_id) values (%(a)s, %(b)s)",
{'a': story['stories_id'], 'b': spidered_tag['tags_id']})
if publish_date is None:
assign_date_guess_tag(db, story, date_guess, fallback_date)
log.debug("add story: %s; %s; %s; %d" % (story['title'], story['url'], story['publish_date'], story['stories_id']))
db.create('feeds_stories_map', {'stories_id': story['stories_id'], 'feeds_id': feed['feeds_id']})
download = create_download_for_new_story(db, story, feed)
mediawords.dbi.downloads.store_content(db, download, content)
_extract_story(db, story)
return story
示例11: login_with_email_password
# 需要导入模块: from mediawords.db import DatabaseHandler [as 别名]
# 或者: from mediawords.db.DatabaseHandler import create [as 别名]
def login_with_email_password(db: DatabaseHandler, email: str, password: str, ip_address: str = None) -> CurrentUser:
"""Log in with username and password; raise on unsuccessful login."""
email = decode_object_from_bytes_if_needed(email)
password = decode_object_from_bytes_if_needed(password)
if not (email and password):
raise McAuthLoginException("Email and password must be defined.")
# Try-except block because we don't want to reveal the specific reason why the login has failed
try:
user = user_info(db=db, email=email)
# Check if user has tried to log in unsuccessfully before and now is trying
# again too fast
if __user_is_trying_to_login_too_soon(db=db, email=email):
raise McAuthLoginException(
"User '%s' is trying to log in too soon after the last unsuccessful attempt." % email
)
if not password_hash_is_valid(password_hash=user.password_hash(), password=password):
raise McAuthLoginException("Password for user '%s' is invalid." % email)
except Exception as ex:
log.info(
"Login failed for %(email)s, will delay any successive login attempt for %(delay)d seconds: %(exc)s" % {
'email': email,
'delay': __POST_UNSUCCESSFUL_LOGIN_DELAY,
'exc': str(ex),
}
)
# Set the unsuccessful login timestamp
# (TIMESTAMP 'now' returns "current transaction's start time", so using LOCALTIMESTAMP instead)
db.query("""
UPDATE auth_users
SET last_unsuccessful_login_attempt = LOCALTIMESTAMP
WHERE email = %(email)s
""", {'email': email})
# It might make sense to time.sleep() here for the duration of $POST_UNSUCCESSFUL_LOGIN_DELAY seconds to prevent
# legitimate users from trying to log in too fast. However, when being actually brute-forced through multiple
# HTTP connections, this approach might end up creating a lot of processes that would time.sleep() and take up
# memory.
#
# So, let's return the error page ASAP and hope that a legitimate user won't be able to reenter his / her
# password before the $POST_UNSUCCESSFUL_LOGIN_DELAY amount of seconds pass.
# Don't give out a specific reason for the user to not be able to find
# out which user emails are registered
raise McAuthLoginException("User '%s' was not found or password is incorrect." % email)
if not user.active():
raise McAuthLoginException("User with email '%s' is not active." % email)
# Reset password reset token (if any)
db.query("""
UPDATE auth_users
SET password_reset_token_hash = NULL
WHERE email = %(email)s
AND password_reset_token_hash IS NOT NULL
""", {'email': email})
if ip_address:
if not user.api_key_for_ip_address(ip_address):
db.create(
table='auth_user_api_keys',
insert_hash={
'auth_users_id': user.user_id(),
'ip_address': ip_address,
})
# Fetch user again
user = user_info(db=db, email=email)
if not user.api_key_for_ip_address(ip_address):
raise McAuthLoginException("Unable to create per-IP API key for IP %s" % ip_address)
return user