Python feature_extractor.FeatureExtractor类代码示例

本文整理汇总了Python中feature_extractor.FeatureExtractor类的典型用法代码示例。如果您正苦于以下问题：Python FeatureExtractor类的具体用法？Python FeatureExtractor怎么用？Python FeatureExtractor使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了FeatureExtractor类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: init

 def __init__(self,texts=None,n=16,step_size=1,k=100,kmeans_args = None):
     self.n = n
     self.step_size = step_size
     self.k = k
     self.kmeans=None
     self.kmeans_args = kmeans_args
     FeatureExtractor.__init__(self)

开发者ID:aerows，项目名称:NLP1-Project，代码行数:7，代码来源:kmeans_ngram.py

示例2: main

def main():
    dataset_path = "/path/to/Caltech-101"
    modelzoo_path = "/path/to/VGG16"
    
    # create an instance
    convnet = FeatureExtractor(
            prototxt_path=os.path.join(modelzoo_path, "vgg16_deploy.prototxt"),
            caffemodel_path=os.path.join(modelzoo_path, "vgg16.caffemodel"),
            target_layer_name="fc7",
            image_size=224,
            mean_values=[103.939, 116.779, 123.68])
    
    # header
    f = open("caltech101_vggnet_fc7_features.csv", "w")
    header = ["filepath"]
    for i in xrange(4096):
        header.append("feat%d" % (i+1))
    header = ",".join(header) + "\n"
    f.write(header)
    
    # extract features
    categories = os.listdir(dataset_path)
    for category in pyprind.prog_bar(categories):
        file_names = os.listdir(os.path.join(dataset_path, category))
        for file_name in file_names:
            img = cv2.imread(os.path.join(dataset_path, category, file_name))
            feat = convnet.transform(img)
            feat_str = [os.path.join(category, file_name)]
            for value in feat:
                feat_str.append(str(value))
            row = ",".join(feat_str)
            f.write("%s\n" % row)
            f.flush()

    f.close()

开发者ID:norikinishida，项目名称:image-feature-extraction-via-convnet，代码行数:35，代码来源:extract_features_caltech101.py

示例3: init

 def __init__(self, prefix='_p_', min_df=1, max_per=1.0, binarize=False, transform=None, replace_num='#',
              source=None, subdir=None, pseudotype=None, splits_file=None, stage='training', suffix='',
              lower=True, scale_factor=None):
     name = 'pkl'
     assert transform != 'tfidf'
     FeatureExtractor.__init__(self, name=name, prefix=prefix, min_df=min_df, max_per=max_per, binarize=binarize,
                               transform=transform, replace_num=replace_num, source=source, subdir=subdir,
                               pseudotype=pseudotype, splits_file=splits_file, stage=stage, suffix=suffix,
                               lower=lower, scale_factor=scale_factor)

开发者ID:dallascard，项目名称:guac，代码行数:9，代码来源:feature_extractor_pkl.py

示例4: init

 def __init__(self, mode, data_type, log_csv_path, feature_path, label_path, debug_limit):
     FeatureExtractor.__init__(self, mode, data_type, log_csv_path, feature_path, debug_limit)
     labels = {}
     with open(label_path, 'r') as r:
         for line in r:
             eid, dropout = line.strip().split(',')
             if str.isdigit(eid):
                 labels[int(eid)] = int(dropout)
     self.labels = labels

开发者ID:numb3r3，项目名称:kdd2015，代码行数:9，代码来源:week_feature_extractor.py

示例5: main

def main():
    caffe_alexnet_path = "/path/to/caffe-modelzoo/AlexNet"
    caffe_vgg16_path = "/path/to/caffe-modelzoo/VGG16"
    caffe_googlenet_path = "/path/to/caffe-modelzoo/GoogleNet"
    keys_path = "/path/to/dataset/keys.txt"
    data_path = "/path/to/dataset/images"
    dst_path = "/path/to/dataset/features.npy"

    modelname = "VGG16"

    # load pre-trained model
    if modelname == "AlexNet":
        if not os.path.exists(os.path.join(caffe_alexnet_path, "imagenet_mean.npy")):
            convert_mean_file(caffe_alexnet_path)
        convnet = FeatureExtractor(
                prototxt_path=os.path.join(caffe_alexnet_path, "alexnet_deploy.prototxt"),
                caffemodel_path=os.path.join(caffe_alexnet_path, "alexnet.caffemodel"),
                target_layer_name="fc6",
                image_size=227,
                mean_path=os.path.join(caffe_alexnet_path, "imagenet_mean.npy")
                )
    elif modelname == "VGG16":
        convnet = FeatureExtractor(
                prototxt_path=os.path.join(caffe_vgg16_path, "vgg16_deploy.prototxt"),
                caffemodel_path=os.path.join(caffe_vgg16_path, "vgg16.caffemodel"),
                target_layer_name="fc6",
                image_size=224,
                mean_values=[103.939, 116.779, 123.68]
                )
    elif modelname == "GoogleNet":
        googlenet = FeatureExtractor(
                prototxt_path=os.path.join(caffe_googlenet_path, "googlenet_deploy.prototxt"),
                caffemodel_path=os.path.join(caffe_googlenet_path, "googlenet.caffemodel"),
                target_layer_name="pool5/7x7_s1",
                image_size=224,
                mean_values=[104.0, 117.0, 123.0]
                )
    else:
        print "Unknown model name: %s" % modelname
        sys.exit(-1)
    
    # data list
    keys = load_keys(keys_path)
    
    # feature extraction
    feats = []
    for key in keys:
        img = cv2.imread(os.path.join(data_path, key))
        assert img is not None
        feat = convnet.transform(img)
        feats.append(feat)
    feats = np.asarray(feats)
    np.save(dst_path, feats)

    print "Done."

开发者ID:norikinishida，项目名称:image-feature-extraction-via-convnet，代码行数:55，代码来源:example.py

示例6: train_model

def train_model(X_df, y_array, skf_is):
    fe = FeatureExtractor()
    fe.fit(X_df, y_array)
    X_array = fe.transform(X_df)
    # Regression
    train_is, _ = skf_is
    X_train_array = np.array([X_array[i] for i in train_is])
    y_train_array = np.array([y_array[i] for i in train_is])
    reg = Regressor()
    reg.fit(X_train_array, y_train_array)
    return fe, reg

开发者ID:xaviercallens，项目名称:OneTeam，代码行数:11，代码来源:unit+test.py

示例7: makefeatures

 def makefeatures(self, sents_list, ppindexlist):
     """
     ARGS
         sent_list: [[s1word1,s1word2,...], [s2word1,s2word2,...],...]
     RETURNS
         _features: a list of feature set (dict)
     """
     _features = []
     for sent, ppindex in zip(sents_list, ppindexlist):
         fe = FeatureExtractor(sent, ppindex, "succ")
         _features.append(fe.features())
     return _features

开发者ID:tuxedocat，项目名称:ss2012，代码行数:12，代码来源:prepchecker_baseline.py

示例8: generate_seti

def generate_seti(filenames, for_test=False):
  files = []
  for filename in filenames:
    for fname in glob.glob(filename):
      files.append(fname)
  print 'logs_to_seti reading from files: %s' % (str(files))
  setis = []
  # Read each file where each row represents a training example.
  for fname in files:
    num_lines = 0
    num_invalid_lines = 0
    num_bad_entry_lines = 0
    bad_entry_lines = []
    # Read examples from file.
    with open(fname, 'rb') as csvfile:
      reader = csv.reader(csvfile)
      reader.next() # ignore header
      i = 0
      invalid_lines = []
      for csv_line in reader:
        num_lines += 1
        bad_line, reason = is_bad_line(csv_line)
        if bad_line:
          num_invalid_lines += 1
          continue
        #try:
        renter_form, err = _to_renter_form(csv_line)
        if renter_form is None:
          print err
          num_bad_entry_lines += 1
          bad_entry_lines.append(csv_line)
          continue
        fe = FeatureExtractor(for_test=for_test)
        seti = fe.to_seti(renter_form)
        setis.append(seti)
        #except Exception as e:
        #  num_invalid_lines += 1
        #  invalid_lines.append(i)
        #  print 'e: %s' % (str(e))
        #  PrintException()
        #  print 'Could not parse line %d. %d cols. \n%s' % (i, len(csv_line), csv_line)
        i += 1
    # Finished handling file.
    print 'File: %s' % fname
    valid_lines = num_lines-num_invalid_lines-num_bad_entry_lines
    print 'Num lines: %d. Valid: %d. Invalid: %d. Bady entry: %d' % (num_lines, valid_lines, num_invalid_lines, num_bad_entry_lines)

  if len(setis) == 0:
    raise Exception('No setis generated!')
  return setis

开发者ID:dlluncor，项目名称:data-mining，代码行数:50，代码来源:logs_to_seti.py

示例9: extract_data

 def extract_data(self, id, extraction_method, label_type):
     extractor = FeatureExtractor()
     feature_vector = extractor.extract_feature_vector(id, extraction_method)
     
     if label_type == 'compiler':
         label = self.extract_compiler_label(id)                 # for compiler estimation
     elif label_type == 'optimization_level':
         label = self.extract_optimization_level_label(id)       # for optimization level estimation
     elif label_type == 'test':
         return feature_vector                                   # for test data
     else:
         sys.stderr.write('Unknown label type specified')
         sys.exit()
     
     return label, feature_vector

开发者ID:t-usui，项目名称:COMES，代码行数:15，代码来源:data_processor.py

示例10: processDir

def processDir(corpusName, mailCorpus, maildir):
    mailIterator = mailCorpus.getFilesList(maildir)
    mailStorage = MailStorage(corpusName)
    featureExtractor = FeatureExtractor()
    progress = ProgressDisplay(len(mailIterator), 'Processing emails')

    # Output files are named 1 to numMails
    index = 1
    for mail in mailIterator:
        processed = processMail(maildir, mail, mailCorpus)
        features = featureExtractor.process(processed)
        mailStorage.store(features, str(index))

        index += 1
        progress.update()

开发者ID:Ryckes，项目名称:spam-survey，代码行数:15，代码来源:example_feature_extraction.py

示例11: update_database_from_file

 def update_database_from_file(self,
                               file_name,
                               asm_file_path,
                               gdl_file_path,
                               compiler=None,
                               optimization_level=None):
     file_name += '_' + compiler + '_' + optimization_level
     
     parser = IDAFileParser()
     extractor = FeatureExtractor()
     db_constructor = DatabaseConstructor()
     
     # Update file_name table
     db_constructor.insert_file_name(file_name)
     
     # Update instruction_sequence table
     instruction_list = parser.extract_instruction(asm_file_path)
     db_constructor.insert_instruction_sequence(file_name, instruction_list)
     
     # Update instruction_code_block table
     code_block_list = parser.extract_code_block(asm_file_path)
     db_constructor.insert_code_block(file_name, code_block_list)
     
     # Update opcode_variety table
     opcode_list = parser.extract_opcode(asm_file_path)
     db_constructor.append_opcode_variety(opcode_list)
     
     # Update bigram_variety table
     bigram_list = extractor.extract_ngram_list(opcode_list, 2)
     db_constructor.append_bigram_variety(bigram_list)
     
     # Update trigram_variety table
     trigram_list = extractor.extract_ngram_list(opcode_list, 3)
     db_constructor.append_trigram_variety(trigram_list)
     
     # Update api table
     api_list = parser.extract_api(gdl_file_path)
     db_constructor.insert_api(file_name, api_list)
     
     # Update api_variety table
     db_constructor.append_api_variety(api_list)
     
     if compiler is not None:
         # Update compiler_information table
         db_constructor.insert_compiler_information(file_name, compiler)
     if optimization_level is not None:
         # Update optimization_level_information table
         db_constructor.insert_optimization_level_information(file_name, optimization_level)

开发者ID:t-usui，项目名称:COMES，代码行数:48，代码来源:data_processor.py

示例12: init

 def __init__(self, movie_dict=None, act_set=None, slot_set=None, db=None, corpus=None,
         train=True, _reload=False, n_hid=100, batch=128, ment=0., inputtype='full', upd=10, 
         sl='e2e', rl='e2e', pol_start=600, lr=0.005, N=1, tr=2.0, ts=0.5, max_req=2, frac=0.5, 
         name=None):
     self.movie_dict = movie_dict
     self.act_set = act_set
     self.slot_set = slot_set
     self.database = db
     self.max_turn = dialog_config.MAX_TURN
     self.training = train
     self.feat_extractor = FeatureExtractor(corpus,self.database.path,N=N)
     out_size = len(dialog_config.inform_slots)+1
     in_size = len(self.feat_extractor.grams) + len(dialog_config.inform_slots)
     slot_sizes = [self.movie_dict.lengths[s] for s in dialog_config.inform_slots]
     self._init_model(in_size, out_size, slot_sizes, self.database, \
             n_hid=n_hid, learning_rate_sl=lr, batch_size=batch, ment=ment, inputtype=inputtype, \
             sl=sl, rl=rl)
     self._name = name
     if _reload: self.load_model(dialog_config.MODEL_PATH+self._name)
     if train: self.save_model(dialog_config.MODEL_PATH+self._name)
     self._init_experience_pool(batch)
     self.episode_count = 0
     self.recent_rewards = deque([], 1000)
     self.recent_successes = deque([], 1000)
     self.recent_turns = deque([], 1000)
     self.recent_loss = deque([], 10)
     self.discount = 0.99
     self.num_updates = 0
     self.pol_start = pol_start
     self.tr = tr
     self.ts = ts
     self.max_req = max_req
     self.frac = frac
     self.upd = upd

开发者ID:SoluMilken，项目名称:KB-InfoBot，代码行数:34，代码来源:agent_e2eRL_allact.py

示例13: TestFeatureExtractor

class TestFeatureExtractor(unittest.TestCase):
    '''
    Unit tests for the FeatureExtractor class. Does simple tests to insure that 
    the feature vector we get back is of the right length and has frequency
    data that makes sense. More tests should be added.
    ''' 
    def setUp(self):
        '''Sets up the test by constructing feature vectors to get tested'''       
        self.record1 = SeqRecord(Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF",
            IUPAC.protein),
            id="YP_025292.1", name="HokC",
            description="toxic membrane protein, small")        
        self.seq1 = self.record1.seq
        self.feature_extractor = FeatureExtractor()  
        self.feature_vector1 = self.feature_extractor.extract_features(self.seq1)
        
    def test_feature_vector_length(self):
        '''Tests that the feature vector is 400 elements long'''
        self.assertEqual(len(self.feature_vector1), 400, msg="Feature vector not 400 long")
        
    def test_dipeptide_frequency_sum(self):
        '''Tests that the dipeptide frequencies sum to 1'''
        checksum = 0.0
        for i in range(0,400):
            checksum += self.feature_vector1[i]
        self.assertAlmostEqual(checksum, 1.0, places=5, msg="Frequencies don't sum to 1")

开发者ID:jrlawson，项目名称:profunc，代码行数:26，代码来源:test_feature_extractor.py

示例14: train_model

    def train_model(self, model_out_file):
        """
        Extract the features from self.data and train the classifier. Output pickled model to model_out_file
        :param model_out_file:
        :return: None
        """
        if self.data is None:
            raise Exception("Trying to train model without any data.")

        sys.stderr.write("Extracting features from data.\n")

        self.featureExtractor = FeatureExtractor(self.data)
        feature_matrix = self.featureExtractor.extract_full_feature_matrix()

        labels = np.array([0 if lab == "Romantic" else 1 for lab in self.data["is_romantic"]])

        sys.stderr.write("Training classifier.\n")

        self.classifier = LogisticRegression() if self.classifier_type == "logit" else DecisionTreeClassifier()
        self.classifier.fit(feature_matrix, labels)

        sys.stderr.write("Saving classifier.\n")

        with open(model_out_file, "w") as f:
            pickle.dump(self.classifier, f)

开发者ID:linii，项目名称:ling229-final，代码行数:25，代码来源:classify_relationship_posts.py

示例15: predict_model

    def predict_model(self, model_file=None, output_file=None, output_probability_file=None):
        """
        Predict classes on self.data and output to output_file
        :param model_file: Model file to read model in from. Otherwise looks for self.classifier
        :param output_file: File to save predictions in
        :param output_probability_file: File to save predicted probabilities in
        :return: predicted classes (array)
        """
        if not self.classifier:
            if not model_file:
                raise Exception("No model to predict with.")
            else:
                with open(model_file) as f:
                    self.classifier = pickle.load(f)

        if self.data is None:
            raise Exception("Trying to predict using model with no data loaded.")

        self.featureExtractor = FeatureExtractor(self.data)
        feature_matrix = self.featureExtractor.extract_full_feature_matrix()

        self.predictions = self.classifier.predict(feature_matrix)

        if output_file is not None:
            np.savetxt(output_file, self.predictions, delimiter=",", fmt="%d")

        if output_probability_file is not None:
            pred_probs = self.classifier.predict_proba(feature_matrix)
            np.savetxt(output_probability_file, pred_probs, delimiter=",", fmt="%.3f")

        return self.predictions

开发者ID:linii，项目名称:ling229-final，代码行数:31，代码来源:classify_relationship_posts.py

注：本文中的feature_extractor.FeatureExtractor类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。

示例1: __init__