本文整理汇总了Python中analyzer.Analyzer类的典型用法代码示例。如果您正苦于以下问题:Python Analyzer类的具体用法?Python Analyzer怎么用?Python Analyzer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Analyzer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: atuser_uid_parser
def atuser_uid_parser(self,response):
item = WeibospiderItem()
analyzer = Analyzer()
friendcircle = FriendCircle()
total_pq = analyzer.get_html(response.body,'script:contains("W_face_radius")')
uid = friendcircle.get_user_uid(total_pq)
self.atuser_dict[response.meta['atuser_nickname']] = uid
示例2: classification_preprocess_all_datasets
def classification_preprocess_all_datasets():
"""
Preprocesses all datasets to be ready for classification task.
This will include stemming, word correction, lower-casing, hashtag removal, special char removal.
"""
for i in range(0,len(utils.annotated_datasets)):
tweetlines = utils.get_dataset(utils.annotated_datasets[i])
tweets = []
for line in tweetlines:
if len(line)>1:
tweets.append(tweet.to_tweet(line))
# tweets = lower_case(tweets)
tweets = remove_hastags_and_users(tweets)
tweets = count_emoticons(tweets)
tweets = replace_links(tweets)
tweets = remove_specialchars(tweets)
tweets = correct_words(tweets)
tweets = stem(tweets)
tweets = tokenize(tweets)
tweets = pos_tag(tweets)
tweets = count_exclamations(tweets)
analyzer = Analyzer(utils.annotated_datasets[i], tweets)
stats = analyzer.analyze()
print stats
#store tweets in pickles...
print "Storing pickles..."
utils.store_pickles(tweets, utils.annotated_datasets[i][24:len(utils.annotated_datasets[i])-4])
示例3: analyze
def analyze(self):
for i, line in enumerate(self.segment):
if i == 0:
self.vicar.name = line
else:
analyzer = Analyzer(line, self.vicar)
analyzer.analyze()
示例4: update_music_data
def update_music_data(self):
analyzer = Analyzer()
music_list = self.banshee.get_tracks()
# delete previously analyzed songs no longer existing in Banshee
for mp3 in self.music_shelve:
if mp3 not in music_list:
del self.music_shelve[mp3]
self.music_shelve.sync()
song_count = len(music_list)
progress = Progress("Analyzing Songs", song_count)
# calculate and save features of new songs
for mp3 in music_list:
if mp3 not in self.music_shelve:
features = analyzer.compute_features(mp3)
if analyzer.valid_features(features):
self.music_shelve[mp3] = features
self.music_shelve.sync()
progress.display()
# convert music data to array
self.music_data = np.array(self.music_shelve.values())
示例5: parse_keyuser
def parse_keyuser(self,response):
item = WeibospiderItem()
analyzer = Analyzer()
total_pq = analyzer.get_html(response.body,'script:contains("feed_content wbcon")')
item['keyword_uid'] =analyzer.get_keyuser(total_pq)
item['keyword'] = response.meta['keyword']
return item
示例6: parse_secondload
def parse_secondload(self,response):
item = response.meta['item']
analyzer = Analyzer()
total_pq = analyzer.get_mainhtml(response.body)
item['content'] = analyzer.get_content(total_pq)
item['time'] = analyzer.get_time(total_pq)
item['atuser'],item['repostuser'] = analyzer.get_atuser_repostuser(total_pq)
return item
示例7: test_pipe_path_winxp
def test_pipe_path_winxp(p):
a = Analyzer()
p.return_value = osversion(5, 1)
assert a.get_pipe_path("foo") == "\\\\.\\PIPE\\foo"
p.return_value = osversion(6, 1)
assert a.get_pipe_path("foo") == "\\??\\PIPE\\foo"
示例8: main
def main():
analyzer = Analyzer()
response = requests.get("http://cryptopals.com/static/challenge-data/4.txt")
content = response.content.split('\n')
for line in content:
decoded_str = line.decode("hex")
analyzer.incremental_brute(decoded_str)
analyzer.getCurrent()
示例9: parse_load
def parse_load(self,response):
item = WeibospiderItem();analyzer = Analyzer();friendcircle = FriendCircle()
total_pq = analyzer.get_mainhtml(response.body)
item['uid'] = response.meta['uid']
item['content'] = analyzer.get_content(total_pq)
item['time'],item['timestamp'] = analyzer.get_time(total_pq)
atuser_info,item['repost_user'] = analyzer.get_atuser_repostuser(total_pq)
yield item
示例10: parse_keyword_info
def parse_keyword_info(self,response):
'''获取搜索结果信息'''
item = WeibospiderItem()
analyzer = Analyzer()
total_pq = analyzer.get_html(response.body,'script:contains("feed_content wbcon")')
keyword_analyzer = keyword_info_analyzer()
item['keyword_uid'],item['keyword_alias'],item['keyword_content'],item['keyword_publish_time'] = keyword_analyzer.get_keyword_info(total_pq)
item['keyword'] = response.meta['keyword']
return item
示例11: parse_total_page
def parse_total_page(self,response):
'''获取需要爬取的搜索结果总页数'''
analyzer = Analyzer()
total_pq = analyzer.get_html(response.body,'script:contains("W_pages")')
keyword_analyzer = keyword_info_analyzer()
total_pages = keyword_analyzer.get_totalpages(total_pq) #需要爬取的搜索结果总页数
for page in range(1): #此处更改为total_pages
search_url = response.meta['search_url'] + str(page + 1) #此处添加for循环total_pages
yield Request(url=search_url,meta={'cookiejar':response.meta['cookiejar'],'keyword':response.meta['keyword']},callback=self.parse_keyword_info)
示例12: parse_thirdload
def parse_thirdload(self,response):
item = WeibospiderItem()
analyzer = Analyzer()
total_pq = analyzer.get_mainhtml(response.body)
item['uid'] = response.meta['uid']
item['content'] = analyzer.get_content(total_pq)
item['time'] = analyzer.get_time(total_pq)
item['atuser'],item['repostuser'] = analyzer.get_atuser_repostuser(total_pq)
return item
示例13: main
def main(path):
files = [join(path, f) for f in listdir(path) if isfile(join(path, f)) and fnmatch(f, '*.zip')]
reports = []
for filename in files:
print >> sys.stderr, "Processing report %s" % filename
rep = report.Report(filename)
reports.append(rep)
an = Analyzer(reports)
an.run()
示例14: run
def run(self):
# info = urllib2.urlopen(self.url).info()
html_name, url_name = scanner([self.url], "000")
a = Analyzer(html_name, url_name)
print self.url
self.model.mydata = self.model.mydata + [(self.url, a.getAds()[1], a.getAds()[0], a.getUniqueVisitors(), "0")]
self.model.emit(SIGNAL("layoutChanged()"))
示例15: parse
def parse(path, f=None):
p = Parser(path=path)
p.parse_file()
a = Analyzer(parser=p)
a.analyze()
j = Packer(analyzer=a)
if f is None:
return j.pack()
else:
j.pack(f=f)