本文整理汇总了Python中extractor.Extractor类的典型用法代码示例。如果您正苦于以下问题:Python Extractor类的具体用法?Python Extractor怎么用?Python Extractor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Extractor类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_cond
def test_cond(self):
from masks import mask
e = Extractor()
logging.debug(e)
e.add_feature_condition(mask)
res = e.extract(self.data)
self.assertTrue(len(res[self.data.keys()[0]]) > 0)
示例2: __init__
class Framework:
""""""
def __init__(self, classifier):
""""""
self.classifier = classifier
self.extractor = Extractor()
def _create_subjects(self):
""""""
return [Subject(FILE_NAMES['NormROIS'] % (file_index + 1))
for file_index in range(NUM_OF_SUBJECTS)]
def _train(self, classifier, features):
""""""
classifier.train(features)
def _classify(self, classifier):
""""""
classifier.classify()
def execute(self):
""""""
# 1) Load the data files
subjects = self._create_subjects()
# 2) Extract the features
self.extractor.extract_features(subjects)
print len(self.extractor.features['P']), exit()
# 3) Train the classifier
self._train(self.classifier, self.extractor.features)
# 4) Classify some data
self._classify(self.classifier)
示例3: install
def install(self):
# Check if package installed
db = hpakDB(self.pkg_name)
if db.get_value("status") == "installed":
misc.print_error("%s - already installed!" % (self.pkg_name), False)
return
self.prepare_install()
dl = download(self.options['source'], self.pkg_path, self.pkg_name)
dl.get()
# Extracting the file.
e = Extractor(self.options)
e.extract()
# Install depends
self.install_dep()
Cmds = self.options['install'].split(',')
for cmd in Cmds:
subprocess.Popen(cmd, shell=True).wait()
# Verify package installed.
if os.path.exists("%s/%s" % (HPAK_ROOT, self.options['dir'])):
db = hpakDB(self.pkg_name)
db.set_value("status", "installed")
misc.print_success("%s installed." % (self.pkg_name))
else:
misc.print_error("%s-%s NOT installed, please try again." % (self.pkg_name, self.options['version']), True)
示例4: test_extractorResultGetJson
def test_extractorResultGetJson(self):
strategy = mock.MagicMock()
strategy.get_data.return_value = {"success": True}
extractor = Extractor(strategy)
result = extractor.get_result()
self.assertEqual('{"success": true}', result.get_json())
示例5: __init__
def __init__(self):
self.__featureNames = sorted(
[name for (name, re) in self.DIRECT_FEATS]
+ [name for (name, re) in self.LEMMA_FEATS]
+ self.CALCULATED_FEATS
)
Extractor.__init__(self)
示例6: ext_json
def ext_json():
rdfUrl = ''
tok = Tokenizer()
if request.method == 'POST':
rdf = request.form['data']
status_test = "0"#request.form['status']
filters = ""#request.form['exculdeurls']
#rdf = "http://jpp.no-ip.org/MAD_J.rdf"
try:
#r = requests.get(rdf)
gg = Graph()
#g.load(rdfUrl)
rdf_content = StringIO.StringIO(rdf.encode('utf-8'))
#print rdf_content.readline()
gg.parse(rdf_content, format="xml")
ext = Extractor(gg)
uris = ext.getUris()
mapping = MapFactory()
for uri in uris:
term = tok.tokenized_url(uri)
uri_status = ""
if status_test == "1":
uri_status = ext.testUri(uri)
else:
uri_status = "N/A"
uri_lookup = str(uri)+"\""
lnum = ext.get_lines(rdf_content, uri_lookup)
ent = MapEntry(uri, term, "", lnum, uri_status)
mapping.add(ent)
jsonized_result = json.dumps(mapping.get())
return Response(jsonized_result, mimetype='application/json')
except requests.exceptions.ConnectionError:
X2Rwarning = 'X2R Warning: The requested URL raises ConnectionError~!!!'
return X2Rwarning
示例7: test_extractorResultGetData
def test_extractorResultGetData(self):
strategy = mock.MagicMock()
strategy.get_data.return_value = {"success": True}
extractor = Extractor(strategy)
result = extractor.get_result()
self.assertTrue(result.get_data()["success"])
示例8: process
def process(self,file):
feats = {}
Extractor.process(self,file)
ir = InputReader(file)
ir.read()
cqpf = CQPFormat(ir.getText())
pos = cqpf.getColumn(1)
# initialize counts
for name in self.posnames:
feats[name] = 0
for i in range(2,len(pos)): # ignore first two pos ...
uni = (pos[i])[0:3]
bi = (pos[i-1])[0:3] + "_" + uni
tri = (pos[i-2])[0:3] + "_" + bi
if uni in feats:
feats[uni] += 1
if bi in feats:
feats[bi] += 1
if tri in feats:
feats[tri] += 1
for x in self.posnames:
feats[x] /= float(len(pos)-2)
return ir.getID(),feats
示例9: predict
def predict(article_link, image_link):
"""
output: predicted emotion as: [ 0. 1. 0. 0. 0.]
"""
e = Extractor()
user_input = {
"article_link": article_link,
"image_link": image_link
}
friendly_json = e.user_extract(user_input)
tax_list = friendly_json['alchemy']['taxonomy']
tax_primary = []
for t in tax_list:
tax_primary.append(t['label'].split('/')[1])
tax_primary = list(set(tax_primary))[0]
extracted_articles = dict()
extracted_articles['articles'] = [friendly_json]
textEmotions = text_emotions_x(extracted_articles)
picEmotions = picture_emotions_x(extracted_articles)
with open('emotionClassification/trained_models/bbac_1150_all_clf.pkl','r') as f:
clf = cPickle.load(f)
test_article = makeDataMatrix(textEmotions, picEmotions)
reaction = predictReactions(clf, test_article)
return reaction[0], tax_primary
示例10: process
def process(self,file):
feats = {}
Extractor.process(self,file)
ir = InputReader(file)
ir.read()
cqpf = CQPFormat(ir.getText())
#words = ' '.join(cqpf.getColumn(0))
#pos = ' '.join(self.disambiguatePOS(cqpf.getColumn(1)))
lemma = cqpf.getColumn(2)
sentences = cqpf.getAnnotations("s")
wordpostmp = []
for (start,end,attr) in sentences:
wordpostmp.append('<s>')
wordpostmp.extend(self.getWordsWithPOS(
cqpf.getColumn(0)[start:end],
self.disambiguatePOS(cqpf.getColumn(1)[start:end])))
wordpostmp.append('</s> ')
wordpos = ' '.join(wordpostmp)
feats.update(self.extractWithREs(self.DIRECT_FEATS,wordpos))
feats.update(self.extractWithREs(self.CALC_FEATS,wordpos))
feats.update(self.extractFromLemmatatizedForms(self.LEMMA_FEATS,lemma))
self.calculateFeats(feats)
self.normalizeByLength(feats, len(lemma))
feats.update(self.extractStatistics(cqpf))
print feats
return ir.getID(),feats
示例11: ExtractorTest
class ExtractorTest(unittest.TestCase):
def setUp(self):
example = "LOQ75625Team LOQ 49% blend std 8mm 21-JUN-2000 12:55:23 30.0"
self.extractor = Extractor()
self.extractor.extract_data(example)
def tearDown(self):
pass
def test_extract_valid_instrument_name(self):
self.assertEqual("LOQ", self.extractor.instrumentname)
def test_extract_valid_run_number(self):
self.assertEqual("75625", self.extractor.runnumber)
def test_extract_valid_run_username(self):
self.assertEqual("Team LOQ", self.extractor.username)
def test_extract_valid_run_experimenttitle(self):
self.assertEqual("49% blend std 8mm", self.extractor.experimenttitle)
def test_extract_valid_run_startdate(self):
self.assertEqual("21-JUN-2000", self.extractor.startdate)
def test_extract_valid_run_starttime(self):
self.assertEqual("12:55:23", self.extractor.starttime)
def test_extract_valid_run_charge(self):
self.assertEqual("30.0", self.extractor.charge)
示例12: generateFeatures
def generateFeatures(self):
'''
Has been hardcoded for wikipedia
For each category, fetch Wiki-pages from list.txt
Store keywords (links in the specified section)in features.txt
'''
e = Extractor()
print self.categories
for name in self.categories:
print name
f = open("%s/%s/%s" % (self.config.get(self.section, "CLASSES_FILE"), name, self.config.get(self.section, "LIST_FILE")), "r")
g = open("%s/%s/%s" % (self.config.get(self.section, "CLASSES_FILE"), name, self.config.get(self.section, "FEATURE_FILE")), "w")
for page in f:
print page
pagetok = page.strip().split('\t')
try: section = pagetok[1]
except: section = 0
links = e.getWikiLinks(pagetok[0], section = section)
for feature in links:
units = set(self.clean(feature).split('_'))
for unit in units:
unit = self.stemmer.stem(unit)
if self.valid(unit):
g.write("%s," % unit)
g.write("\n")
f.close()
g.close()
示例13: add_synset
def add_synset(self, word):
ex = Extractor()
word_id = md5.md5(word).hexdigest()
if not self.fdb.get(word_id):
self.fdb.set(ROOT + word_id, word)
synset = ex.getWikiBacklinks(word)
if synset:
for synonym in synset:
self.fdb.set(SYN + synonym.upper(), word_id)
示例14: test_monotony
def test_monotony(self):
from masks import absolute_monotony as monotony
e = Extractor()
logging.debug(e)
e.add_feature_condition(monotony.Raising)
e.add_feature_condition(monotony.Falling)
res = e.extract(self.data)
logging.debug("res: \n%s", pprint.pformat(res))
self.assertTrue(len(res[self.data.keys()[0]]) > 0)
示例15: process
def process(self,file):
feats = {}
Extractor.process(self,file)
ir = InputReader(file)
ir.read()
cqpf = CQPFormat(ir.getText())
lengths = [end-start for (start,end,arg) in cqpf.getAnnotations("s")]
print self.__featureNames
feats = utils.getStats("SENT_LENGTH", lengths)
return ir.getID(),feats