本文整理汇总了Python中article.Article方法的典型用法代码示例。如果您正苦于以下问题:Python article.Article方法的具体用法?Python article.Article怎么用?Python article.Article使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类article
的用法示例。
在下文中一共展示了article.Article方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_data
# 需要导入模块: import article [as 别名]
# 或者: from article import Article [as 别名]
def load_data(self, path, is_dir=False):
data = []
filename = None
if is_dir:
filenames = [name for name in os.listdir(path) if not name.startswith.(".")]
else:
filenames = [path]
for filename in filenames:
with open(os.path.join(path, filename),'r', encoding="utf-8") as data:
tp = json.load(data)
for article in tp:
try:
self.corpus.append(Article(article))
except:
print("? %s ???????????" % filename)
示例2: crawl
# 需要导入模块: import article [as 别名]
# 或者: from article import Article [as 别名]
def crawl(url,username,full_articles=True):
articles = list()
d = feedparser.parse(url)
for entry in d["entries"]:
if 'published_parsed' in entry:
pubdate = pytz.utc.localize(datetime.fromtimestamp(mktime(entry['published_parsed'])))
else:
pubdate = pytz.utc.localize(datetime.fromtimestamp(mktime(entry['updated_parsed'])))
articles.append(Article(
title=entry['title'],
url= entry['link'],
body=entry["content"][0]["value"] if 'content' in entry else entry["summary"],
username=username,
pubdate=pubdate,
))
return articles
示例3: read_parses
# 需要导入模块: import article [as 别名]
# 或者: from article import Article [as 别名]
def read_parses(parse_path, relations_dict=None):
parses = [json.loads(x) for x in open(parse_path)]
for doc_id in parses[0]:
print >> logs, "Doc ID:%s" % doc_id
doc = parses[0][doc_id]
sentences = []
for sid, sen in enumerate(doc['sentences']):
parse_tree = sen['parsetree']
dep_tree = sen['dependencies']
words = sen['words']
# provided by kong
if sid == 0 and words[0][1]['CharacterOffsetBegin'] < 8:
words[0][1]['CharacterOffsetBegin'] += 6
sentences.append(Sentence(sid, parse_tree, dep_tree, words))
if relations_dict is not None:
relations = relations_dict[doc_id]
else:
relations = []
params = {'sens': sentences, 'rels':relations}
yield Article(doc_id, params)
示例4: read_parses
# 需要导入模块: import article [as 别名]
# 或者: from article import Article [as 别名]
def read_parses(parse_path, relations_dict=None):
parses = [json.loads(x) for x in open(parse_path)]
for doc_id in parses[0]:
print >> logs, "Doc ID:%s" % doc_id
doc = parses[0][doc_id]
sentences = []
for sid, sen in enumerate(doc['sentences']):
parse_tree = sen['parsetree']
dep_tree = sen['dependencies']
words = sen['words']
sentences.append(Sentence(sid, parse_tree, dep_tree, words))
if relations_dict is not None:
relations = relations_dict[doc_id]
else:
relations = []
params = {'sens': sentences, 'rels':relations}
yield Article(doc_id, params)
示例5: _get_article_list
# 需要导入模块: import article [as 别名]
# 或者: from article import Article [as 别名]
def _get_article_list(self):
""" ?????????????html????
"""
article = Article()
article_list = []
for i in range(self.zhuanlan_dict['post_count']):
url = 'https://zhuanlan.zhihu.com/api/columns/{0}/posts?limit=1&offset={1}'.format(self.slug, i)
article_list.append(article.get_article_html(url=url))
return article_list
示例6: predict
# 需要导入模块: import article [as 别名]
# 或者: from article import Article [as 别名]
def predict(input_dir):
'''
Get genre probabilities for each text document in input directory.
'''
clf = joblib.load('model.pkl')
with open('results.csv', 'wb') as fh:
writer = csv.writer(fh, delimiter='\t')
writer.writerow(['Filename'] + [utilities.genres[g][0].split('/')[0]
for g in utilities.genres])
for filename in [f for f in os.listdir(input_dir) if f.endswith('.txt')]:
with open(input_dir + os.sep + filename) as ifh:
print('Processing file: ' + filename)
row = []
row.append(filename)
# Read input file
doc = ifh.read().decode('utf-8')
# Create article object and calculate features
art = article.Article(text=doc)
features = [art.features[f] for f in utilities.features]
# Get probability for each genre
proba = clf.predict_proba([features])[0]
# Save results
for g in utilities.genres:
row.append(str(proba[g - 1])[:6])
writer.writerow(row)
print(row[1:])
示例7: index
# 需要导入模块: import article [as 别名]
# 或者: from article import Article [as 别名]
def index():
'''
Return the probability for each genre.
'''
if not (request.query.text or request.query.url):
return 'invoke with ?text= or ?url='
if request.query.text:
art = article.Article(text=request.query.text)
elif request.query.url:
art = article.Article(url=request.query.url)
example = [art.features[f] for f in utilities.features]
abs_path = os.path.dirname(os.path.realpath(__file__))
clf = joblib.load(abs_path + os.sep + 'model.pkl')
proba = clf.predict_proba([example])[0]
resp = {}
for i, p in enumerate(proba):
resp[utilities.genres[i + 1][0].split('/')[0]] = str(proba[i])[:6]
resp = json.dumps(resp)
if request.query.callback:
resp = request.query.callback + '(' + resp + ')'
return resp
示例8: generate_training
# 需要导入模块: import article [as 别名]
# 或者: from article import Article [as 别名]
def generate_training(self, path):
'''
Generate training data from a list of labeled articles.
'''
with open(path, 'rU') as fh:
db = csv.DictReader(fh, delimiter='\t')
with open('data/training.txt', 'wb') as fh:
fieldnames = ['url', 'label'] + utilities.features
writer = csv.DictWriter(fh, fieldnames=fieldnames,
delimiter='\t')
writer.writeheader()
for i, row in enumerate(db):
# Get url
url = None
if row['Identifier']:
url = row['Identifier']
elif (row['Prediction'] != 'None' and
float(row['Confidence']) > 0.675):
url = row['Prediction']
else:
continue
if not url.endswith(':ocr'):
url += ':ocr'
# Get label
label = None
for g in utilities.genres:
if row['Genre'] in utilities.genres[g]:
label = g
break
if not label:
continue
# If valid training instance found, create new article
try:
art = article.Article(url=url)
# Save results
fields = {'label': label, 'url': url}
for f in utilities.features:
fields[f] = art.features[f]
writer.writerow(fields)
except (IOError, AssertionError) as e:
print('Error processsing article ' + url + ': '
+ repr(e))