本文整理汇总了Python中article.Article类的典型用法代码示例。如果您正苦于以下问题:Python Article类的具体用法?Python Article怎么用?Python Article使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Article类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main():
_, file_name, nquestion = sys.argv
clean_up(file_name)
timer_log("reference resolution")
questions = []
# replaced
with open("../temp/article.clean") as farticle:
article = Article(farticle.read())
for sent in article.sentences():
print(sent)
try:
for simp_s in simplify_sen(sent):
q_generated = question(simp_s)
questions.extend(q_generated)
for q in q_generated:
print(q)
print("")
except:
print("failed")
if debug:
traceback.print_exc()
timer_log("one sentence")
print(ranking.get_top_questions('\n'.join(questions), nquestion))
timer_log("ranking")
示例2: __init__
def __init__(self, app):
bmodel = BlogModel()
bview = BlogFeedView()
form = Form()
art = Article()
if app.request.GET:
# On submit of the form. The info is stored in the self.the_posted object
self.the_post = {}
self.the_post["title"] = app.request.GET["title"]
self.the_post["author"] = app.request.GET["author"]
self.the_post["tags"] = app.request.GET["tags"]
self.the_post["category"] = app.request.GET["category"]
self.the_post["article"] = app.request.GET["article"]
self.the_post["date"] = (time.strftime("%m/%d/%Y"))
# Pass the post object into the format method art.a() to return a string html.
# Then add the the global log_post array to the add_post_list so that the post will be append to it.
bmodel.add_post_list(art.a(self.the_post), log_posts)
# This will then write the response to the browser after the full html document is returned.
app.response.write(bview.form_success(bmodel.formator(log_posts), form.reg_form))
else:
# If no submission display form only to the browser.
app.response.write(bview.print_out_form(form.reg_form))
示例3: set_yahoo_articles
def set_yahoo_articles(self):
page = 3
url_list_yahoo = [
{'category': '国内', 'url': ['http://news.yahoo.co.jp/list/?c=domestic']},
{'category': '国際', 'url': ['http://news.yahoo.co.jp/list/?c=world']},
{'category': '経済', 'url': ['http://news.yahoo.co.jp/list/?c=economy']},
{'category': 'エンタメ', 'url': ['http://news.yahoo.co.jp/list/?c=entertainment']},
{'category': 'スポーツ', 'url': ['http://news.yahoo.co.jp/list/?c=sports']},
{'category': 'IT', 'url': ['http://news.yahoo.co.jp/list/?c=computer']},
{'category': '科学', 'url': ['http://news.yahoo.co.jp/list/?c=science']},
{'category': '地域', 'url': ['http://news.yahoo.co.jp/list/?c=local']}
]
# make url list
for item in url_list_yahoo:
for page_num in range(2, page):
url = item['url'][0] + '&p=' + str(page_num)
item['url'].append(url)
# make Article
for item in url_list_yahoo:
for page_num in range(0, page - 1):
d = pq(item['url'][page_num])
for (title, url) in zip(d('.list .ttl'), d('.list a')):
url = 'http://news.yahoo.co.jp' + d(url).attr('href')
category = item['category']
title = d(title).text().encode('utf-8')
content = pq(url)('.hbody').text().encode('utf-8')
article = Article(url, category, title, content)
article.get_info()
self.collection.append(article)
示例4: load_data
def load_data(self):
self.mp.get_cycles()
self.mp.get_yearly_cycles()
self.mp.get_monthly_cycles()
self.tp.get_cycles()
self.tp.get_yearly_cycles()
self.tp.get_monthly_cycles()
for art in self.pattern.arch.articles:
try:
art = Article(art,self.pattern.arch.articleTitleToURL[art],self.pattern.arch.articles[art],self.pattern)
try:
art.get_views()
if art.views != []:
self.average.add_art(art)
if not len(art.views) > 24*len(self.pattern.relativeDays)-1:
self.articles.append(art)
else:
print "Look at page views for ", art.title
except IndexError:
print art.link_title
except KeyError:
pass
self.average.calc_av_views()
print "Number of articles for "+self.pattern.title+": ", self.average.num_added
示例5: article_api
def article_api(version=1):
""" Obtain information about an article, given its URL or id """
if not (1 <= version <= 1):
return better_jsonify(valid=False, reason="Unsupported version")
if request.method == "GET":
url = request.args.get("url")
uuid = request.args.get("id")
else:
url = request.form.get("url")
uuid = request.form.get("id")
if url:
url = url.strip()[0:_MAX_URL_LENGTH]
if uuid:
uuid = uuid.strip()[0:_MAX_UUID_LENGTH]
if url:
# URL has priority, if both are specified
uuid = None
if not url and not uuid:
return better_jsonify(valid=False, reason="No url or id specified in query")
with SessionContext(commit=True) as session:
if uuid:
a = ArticleProxy.load_from_uuid(uuid, session)
elif url.startswith("http:") or url.startswith("https:"):
a = ArticleProxy.load_from_url(url, session)
else:
a = None
if a is None:
return better_jsonify(valid=False, reason="Article not found")
if a.html is None:
return better_jsonify(valid=False, reason="Unable to fetch article")
# Prepare the article for display
a.prepare(session)
register = a.create_register(session, all_names=True)
# Fetch names of article topics, if any
topics = (
session.query(ArticleTopic).filter(ArticleTopic.article_id == a.uuid).all()
)
topics = [dict(name=t.topic.name, id=t.topic.identifier) for t in topics]
return better_jsonify(
valid=True,
url=a.url,
id=a.uuid,
heading=a.heading,
author=a.author,
ts=a.timestamp.isoformat()[0:19],
num_sentences=a.num_sentences,
num_parsed=a.num_parsed,
ambiguity=a.ambiguity,
register=register,
topics=topics,
)
示例6: create_article
def create_article(cls, parent, slug, site=None, title="Root", **kwargs):
if not site:
site = Site.objects.get_current()
newpath = cls.objects.create(site=site, parent=parent, slug=slug)
article = Article(title=title)
article.add_revision(ArticleRevision(title=title, **kwargs), save=True)
article.add_object_relation(newpath)
return newpath
示例7: post
def post(self,ID = None):
atitle = cgi.escape(self.request.get('blogTitle'))
atext = cgi.escape(self.request.get('blogText'))
blog = Article(title=atitle,
text=atext)
blog.put()
#self.response.write(title)
#self.response.write(atext)
self.redirect_to('home')
示例8: check_for_dtd_error
def check_for_dtd_error(args):
a = Article(archive_file=args.article_file.name, read_only=True)
error = a.check_for_dtd_error()
if error:
if args.format_ariespull:
print "error: DTD error: %s" % error
else:
print error
a.close()
示例9: add_feed
def add_feed(self, feed):
"""
add_feed takes the URL or file path of a Feedzilla feed, cleans it up,
and adds the articles this Feed object's list.
"""
log.info("Retrieving feed.")
f = feedparser.parse(feed)
for item in f['entries']:
a = Article()
# Set ID as integer, without feedzilla at beginning
a.id = item['id']
a.id = re.sub(r'.*feedzilla\.com:(.*)', r'\1', a.id)
a.id = int(a.id)
if a.id not in self.articles.keys():
# Set source, author and title
a.author = item['author']
a.title = item['title']
a.source=item['source']['links'][0]['href']
a.trueSource="http://news.feedzilla.com/en_us/stories/world-news/"+str(a.id)
# Set summary, get rid of all the junk at the end
summary = item['summary']
summary = summary[:summary.find("\n\n")]
summary = summary[:summary.find("<")]
a.summary = summary
# Add the article if it doesn't already exist
self.articles[a.id] = a
示例10: add_feed
def add_feed(self, feed):
print "Adding feed =>",
f = feedparser.parse(feed)
for item in f['entries']:
a = Article()
# Set ID as integer, without feedzilla at beginning
a.id = item['id']
a.id = re.sub(r'.*feedzilla\.com:(.*)', r'\1', a.id)
a.id = int(a.id)
if a.id not in self.articles.keys():
# Set source, author and title
a.author = item['author']
a.title = item['title']
a.source=item['source']['links'][0]['href']
a.trueSource="http://news.feedzilla.com/en_us/stories/world-news/"+str(a.id)
# Set summary, get rid of all the junk at the end
summary = item['summary']
summary = summary[:summary.find("\n\n")]
summary = summary[:summary.find("<")]
a.summary = summary
# Add the article if it doesn't already exist
self.articles[a.id] = a
print "Done"
示例11: setUpClass
def setUpClass(cls):
with open(INPUT_JSON) as f:
cls.valid_data = json.load(f)
with open(INPUT_HTML) as f:
html = f.read()
article = Article()
article.url = cls.valid_data['url']
article.source = cls.valid_data['source']
parse(article, html)
cls._crawled_article = article
示例12: create_root
def create_root(cls, site=None, title="Root", **kwargs):
if not site:
site = Site.objects.get_current()
root_nodes = cls.objects.root_nodes().filter(site=site)
if not root_nodes:
# (get_or_create does not work for MPTT models??)
root = cls.objects.create(site=site)
article = Article(title=title)
article.add_revision(ArticleRevision(title=title, **kwargs), save=True)
article.add_object_relation(root)
else:
root = root_nodes[0]
return root
示例13: speed_test
def speed_test(uuid):
try:
print("Starting speed test")
t0 = time.time()
with SessionContext(commit = True) as session:
# Load the article
a = Article.load_from_uuid(uuid, session)
if a is not None:
# Parse it and store the updated version
a.parse(session, verbose = True)
t1 = time.time()
print("Parsing finished in {0:.2f} seconds".format(t1 - t0))
finally:
Article.cleanup()
示例14: reparse_api
def reparse_api(version=1):
""" Reparse an already parsed and stored article with a given UUID """
if not (1 <= version <= 1):
return better_jsonify(valid="False", reason="Unsupported version")
uuid = request.form.get("id", "").strip()[0:_MAX_UUID_LENGTH]
tokens = None
register = {}
stats = {}
with SessionContext(commit=True) as session:
# Load the article
a = ArticleProxy.load_from_uuid(uuid, session)
if a is not None:
# Found: Parse it (with a fresh parser) and store the updated version
a.parse(session, verbose=True, reload_parser=True)
# Save the tokens
tokens = a.tokens
# Build register of person names
register = a.create_register(session)
stats = dict(
num_tokens=a.num_tokens,
num_sentences=a.num_sentences,
num_parsed=a.num_parsed,
ambiguity=a.ambiguity,
)
# Return the tokens as a JSON structure to the client,
# along with a name register and article statistics
return better_jsonify(valid=True, result=tokens, register=register, stats=stats)
示例15: generate_relation_dict
def generate_relation_dict(self, news_sources, news_targets):
'''
generates a dictionary of string/list(int) in the format
{source : target_count}
ie. {s1 : [tc1, tc2, ... tcn],
s2 : [tc1, tc2, ... tcn], ...
sn : [tc1, tc2, ... tcn]}
where sn is the source, tcn is the citation count of each target
'''
# initialize the relation dictionary.
relation_dict = {}
for source_name, source_url in news_sources.iteritems():
# create an empty list with a specific size which describe the number
# of target referenced by each source
target_count = [0] * len(news_targets)
# Find the articles which have a specific source website url
articles = Article.objects(
Q(website=Website.objects(homepage_url=source_url).only('homepage_url').first()) &
Q(citations__exists=True)).only('citations')
for article in articles:
# Count the times that each target in the news_targets is in the
# citation list for each article and put it in the target_count
for citation in article.citations:
if not isinstance( citation, int ):
i = 0
while i < len(news_targets):
if citation.target_name.lower() == news_targets.keys()[i].lower():
target_count[i] += 1
i += 1
relation_dict[source_name] = target_count
return relation_dict