当前位置: 首页>>代码示例>>Python>>正文


Python extractor.Extractor类代码示例

本文整理汇总了Python中extractor.Extractor的典型用法代码示例。如果您正苦于以下问题:Python Extractor类的具体用法?Python Extractor怎么用?Python Extractor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Extractor类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_cond

 def test_cond(self):
     from masks import mask
     e = Extractor()
     logging.debug(e)
     e.add_feature_condition(mask)
     res = e.extract(self.data)
     self.assertTrue(len(res[self.data.keys()[0]]) > 0)
开发者ID:phipse,项目名称:complexlab_ai,代码行数:7,代码来源:__test_extractor.py

示例2: __init__

class Framework:
    """"""

    def __init__(self, classifier):
        """"""
        self.classifier = classifier
        self.extractor = Extractor()

    def _create_subjects(self):
        """"""
        return [Subject(FILE_NAMES['NormROIS'] % (file_index + 1)) 
                    for file_index in range(NUM_OF_SUBJECTS)]

    def _train(self, classifier, features):
        """"""
        classifier.train(features)

    def _classify(self, classifier):
        """"""
        classifier.classify()

    def execute(self):
        """"""
        # 1) Load the data files
        subjects = self._create_subjects()
        # 2) Extract the features
        self.extractor.extract_features(subjects)
        print len(self.extractor.features['P']), exit()
        # 3) Train the classifier
        self._train(self.classifier, self.extractor.features)
        # 4) Classify some data
        self._classify(self.classifier)
开发者ID:spirosikmd,项目名称:kimml09,代码行数:32,代码来源:framework.py

示例3: install

	def install(self):
		# Check if package installed
		db = hpakDB(self.pkg_name)
		if db.get_value("status") == "installed":
			misc.print_error("%s - already installed!" % (self.pkg_name), False)
			return
							
		self.prepare_install()
		dl = download(self.options['source'], self.pkg_path, self.pkg_name)
		dl.get()
		
		# Extracting the file.
		e =	Extractor(self.options)
		e.extract()

		# Install depends
		self.install_dep()

		Cmds = self.options['install'].split(',')
		for cmd in Cmds:
			subprocess.Popen(cmd, shell=True).wait()

		# Verify package installed.
		if os.path.exists("%s/%s" % (HPAK_ROOT, self.options['dir'])):
			db = hpakDB(self.pkg_name)
			db.set_value("status", "installed")
			misc.print_success("%s installed." % (self.pkg_name))
		else:
			misc.print_error("%s-%s NOT installed, please try again." % (self.pkg_name, self.options['version']), True) 
开发者ID:blackreaven,项目名称:hpak,代码行数:29,代码来源:hpak.py

示例4: test_extractorResultGetJson

    def test_extractorResultGetJson(self):
        strategy = mock.MagicMock()
        strategy.get_data.return_value = {"success": True}
        extractor = Extractor(strategy)
        result = extractor.get_result()

        self.assertEqual('{"success": true}', result.get_json())
开发者ID:boynux,项目名称:sarafy-gae,代码行数:7,代码来源:test_crawler.py

示例5: __init__

 def __init__(self):
     self.__featureNames = sorted(
         [name for (name, re) in self.DIRECT_FEATS]
         + [name for (name, re) in self.LEMMA_FEATS]
         + self.CALCULATED_FEATS
     )
     Extractor.__init__(self)
开发者ID:rforge,项目名称:sigil,代码行数:7,代码来源:biberextractor.py

示例6: ext_json

def ext_json():
    rdfUrl = ''
    tok = Tokenizer()
    if request.method == 'POST':
        rdf = request.form['data']
        status_test = "0"#request.form['status']
        filters = ""#request.form['exculdeurls']
        #rdf = "http://jpp.no-ip.org/MAD_J.rdf"
        try:
            #r = requests.get(rdf)
            gg = Graph()
            #g.load(rdfUrl)
            rdf_content = StringIO.StringIO(rdf.encode('utf-8'))
            #print rdf_content.readline()
            gg.parse(rdf_content,  format="xml")
            ext = Extractor(gg)
            uris = ext.getUris()
            mapping = MapFactory()
            for uri in uris:
                term = tok.tokenized_url(uri)
                uri_status = ""
                if status_test == "1":
                    uri_status = ext.testUri(uri)
                else:
                    uri_status = "N/A"  
                uri_lookup = str(uri)+"\"" 
                lnum = ext.get_lines(rdf_content, uri_lookup)          
                ent = MapEntry(uri, term, "", lnum, uri_status)
                mapping.add(ent)
            jsonized_result = json.dumps(mapping.get())              
            return Response(jsonized_result, mimetype='application/json')
        except requests.exceptions.ConnectionError:
            X2Rwarning = 'X2R Warning: The requested URL raises ConnectionError~!!!'
            return X2Rwarning
开发者ID:FengPu,项目名称:x2r-me,代码行数:34,代码来源:x2r-me.py

示例7: test_extractorResultGetData

    def test_extractorResultGetData(self):
        strategy = mock.MagicMock()
        strategy.get_data.return_value = {"success": True}
        extractor = Extractor(strategy)
        result = extractor.get_result()

        self.assertTrue(result.get_data()["success"])
开发者ID:boynux,项目名称:sarafy-gae,代码行数:7,代码来源:test_crawler.py

示例8: process

 def process(self,file):
     feats = {}
     Extractor.process(self,file)
     ir = InputReader(file)
     ir.read()
     cqpf = CQPFormat(ir.getText())
     pos = cqpf.getColumn(1)
     # initialize counts
     
     for name in self.posnames:
         feats[name] = 0
     
     for i in range(2,len(pos)): # ignore first two pos ...
         uni =  (pos[i])[0:3]
         bi = (pos[i-1])[0:3] + "_" + uni
         tri = (pos[i-2])[0:3] + "_" + bi
         if uni in feats:
             feats[uni] += 1
         if bi in feats:
             feats[bi] += 1
         if tri in feats:
             feats[tri] += 1
         
     for x in self.posnames:
         feats[x] /= float(len(pos)-2)
     
     return ir.getID(),feats
开发者ID:rforge,项目名称:sigil,代码行数:27,代码来源:posextractor.py

示例9: predict

def predict(article_link, image_link):
    """
    output: predicted emotion as: [ 0.  1.  0.  0.  0.]
    """
    e = Extractor()
    user_input = {
        "article_link": article_link,
        "image_link": image_link
    }

    friendly_json = e.user_extract(user_input)

    tax_list = friendly_json['alchemy']['taxonomy']
    tax_primary = []
    for t in tax_list:
        tax_primary.append(t['label'].split('/')[1])

    tax_primary = list(set(tax_primary))[0]

    extracted_articles = dict()
    extracted_articles['articles'] = [friendly_json]
    textEmotions = text_emotions_x(extracted_articles)
    picEmotions = picture_emotions_x(extracted_articles)

    with open('emotionClassification/trained_models/bbac_1150_all_clf.pkl','r') as f:
        clf = cPickle.load(f)

    test_article = makeDataMatrix(textEmotions, picEmotions)

    reaction = predictReactions(clf, test_article)

    return reaction[0], tax_primary
开发者ID:franciscovargas,项目名称:FH,代码行数:32,代码来源:predict.py

示例10: process

 def process(self,file):
     feats = {}
     Extractor.process(self,file)
     ir = InputReader(file)
     ir.read()
     cqpf = CQPFormat(ir.getText())
     #words = ' '.join(cqpf.getColumn(0))
     #pos = ' '.join(self.disambiguatePOS(cqpf.getColumn(1)))
     lemma = cqpf.getColumn(2)
     sentences = cqpf.getAnnotations("s")
     wordpostmp = []
     for (start,end,attr) in sentences:
         wordpostmp.append('<s>')
         wordpostmp.extend(self.getWordsWithPOS(
                             cqpf.getColumn(0)[start:end],
                             self.disambiguatePOS(cqpf.getColumn(1)[start:end])))
         wordpostmp.append('</s> ')
     wordpos = ' '.join(wordpostmp)
     feats.update(self.extractWithREs(self.DIRECT_FEATS,wordpos))
     feats.update(self.extractWithREs(self.CALC_FEATS,wordpos))
     feats.update(self.extractFromLemmatatizedForms(self.LEMMA_FEATS,lemma))
     self.calculateFeats(feats)
     self.normalizeByLength(feats, len(lemma))
     feats.update(self.extractStatistics(cqpf))
     print feats
     return ir.getID(),feats
开发者ID:rforge,项目名称:sigil,代码行数:26,代码来源:statextractor.py

示例11: ExtractorTest

class ExtractorTest(unittest.TestCase):

    def setUp(self):
        example = "LOQ75625Team LOQ            49% blend std 8mm       21-JUN-2000 12:55:23    30.0"
        self.extractor = Extractor()
        self.extractor.extract_data(example)

    def tearDown(self):
        pass

    def test_extract_valid_instrument_name(self):
        self.assertEqual("LOQ", self.extractor.instrumentname)

    def test_extract_valid_run_number(self):
        self.assertEqual("75625", self.extractor.runnumber)

    def test_extract_valid_run_username(self):
        self.assertEqual("Team LOQ", self.extractor.username)

    def test_extract_valid_run_experimenttitle(self):
        self.assertEqual("49% blend std 8mm", self.extractor.experimenttitle)

    def test_extract_valid_run_startdate(self):
        self.assertEqual("21-JUN-2000", self.extractor.startdate)

    def test_extract_valid_run_starttime(self):
        self.assertEqual("12:55:23", self.extractor.starttime)

    def test_extract_valid_run_charge(self):
        self.assertEqual("30.0", self.extractor.charge)
开发者ID:ISISComputingGroup,项目名称:VmsJournalFileConverter,代码行数:30,代码来源:extractor_test.py

示例12: generateFeatures

	def generateFeatures(self):
		'''
		Has been hardcoded for wikipedia
		For each category, fetch Wiki-pages from list.txt
		Store keywords (links in the specified section)in features.txt
		'''
		e = Extractor()
		print self.categories
		for name in self.categories:
			print name
			f = open("%s/%s/%s" % (self.config.get(self.section, "CLASSES_FILE"), name, self.config.get(self.section, "LIST_FILE")), "r")
			g = open("%s/%s/%s" % (self.config.get(self.section, "CLASSES_FILE"), name, self.config.get(self.section, "FEATURE_FILE")), "w")
			for page in f:
				print page
				pagetok = page.strip().split('\t')
				try: section = pagetok[1]
				except: section = 0
				links = e.getWikiLinks(pagetok[0], section = section)
				for feature in links:
					units = set(self.clean(feature).split('_'))
					for unit in units:
						unit = self.stemmer.stem(unit)
						if self.valid(unit):
							g.write("%s," % unit)
				g.write("\n")
			f.close()
			g.close()
开发者ID:coep-rankweb,项目名称:Webclassifier,代码行数:27,代码来源:wiki_source.py

示例13: add_synset

	def add_synset(self, word):
		ex = Extractor()
		word_id = md5.md5(word).hexdigest()
		if not self.fdb.get(word_id):
			self.fdb.set(ROOT + word_id, word)
		synset = ex.getWikiBacklinks(word)
		if synset:
			for synonym in synset:
				self.fdb.set(SYN + synonym.upper(), word_id)
开发者ID:saurabhkb,项目名称:tailor,代码行数:9,代码来源:crawler.py

示例14: test_monotony

 def test_monotony(self):
     from masks import absolute_monotony as monotony
     e = Extractor()
     logging.debug(e)
     e.add_feature_condition(monotony.Raising)
     e.add_feature_condition(monotony.Falling)
     res = e.extract(self.data)
     logging.debug("res: \n%s", pprint.pformat(res))
     self.assertTrue(len(res[self.data.keys()[0]]) > 0)
开发者ID:phipse,项目名称:complexlab_ai,代码行数:9,代码来源:__test_extractor.py

示例15: process

 def process(self,file):
     feats = {}
     Extractor.process(self,file)
     ir = InputReader(file)
     ir.read()
     cqpf = CQPFormat(ir.getText())
     lengths = [end-start for (start,end,arg) in cqpf.getAnnotations("s")]
     print self.__featureNames
     feats = utils.getStats("SENT_LENGTH", lengths)
     return ir.getID(),feats
开发者ID:rforge,项目名称:sigil,代码行数:10,代码来源:sentencelengthextractor.py


注:本文中的extractor.Extractor类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。