本文整理汇总了Python中feature_extractor.FeatureExtractor类的典型用法代码示例。如果您正苦于以下问题:Python FeatureExtractor类的具体用法?Python FeatureExtractor怎么用?Python FeatureExtractor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了FeatureExtractor类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self,texts=None,n=16,step_size=1,k=100,kmeans_args = None):
self.n = n
self.step_size = step_size
self.k = k
self.kmeans=None
self.kmeans_args = kmeans_args
FeatureExtractor.__init__(self)
示例2: main
def main():
dataset_path = "/path/to/Caltech-101"
modelzoo_path = "/path/to/VGG16"
# create an instance
convnet = FeatureExtractor(
prototxt_path=os.path.join(modelzoo_path, "vgg16_deploy.prototxt"),
caffemodel_path=os.path.join(modelzoo_path, "vgg16.caffemodel"),
target_layer_name="fc7",
image_size=224,
mean_values=[103.939, 116.779, 123.68])
# header
f = open("caltech101_vggnet_fc7_features.csv", "w")
header = ["filepath"]
for i in xrange(4096):
header.append("feat%d" % (i+1))
header = ",".join(header) + "\n"
f.write(header)
# extract features
categories = os.listdir(dataset_path)
for category in pyprind.prog_bar(categories):
file_names = os.listdir(os.path.join(dataset_path, category))
for file_name in file_names:
img = cv2.imread(os.path.join(dataset_path, category, file_name))
feat = convnet.transform(img)
feat_str = [os.path.join(category, file_name)]
for value in feat:
feat_str.append(str(value))
row = ",".join(feat_str)
f.write("%s\n" % row)
f.flush()
f.close()
开发者ID:norikinishida,项目名称:image-feature-extraction-via-convnet,代码行数:35,代码来源:extract_features_caltech101.py
示例3: __init__
def __init__(self, prefix='_p_', min_df=1, max_per=1.0, binarize=False, transform=None, replace_num='#',
source=None, subdir=None, pseudotype=None, splits_file=None, stage='training', suffix='',
lower=True, scale_factor=None):
name = 'pkl'
assert transform != 'tfidf'
FeatureExtractor.__init__(self, name=name, prefix=prefix, min_df=min_df, max_per=max_per, binarize=binarize,
transform=transform, replace_num=replace_num, source=source, subdir=subdir,
pseudotype=pseudotype, splits_file=splits_file, stage=stage, suffix=suffix,
lower=lower, scale_factor=scale_factor)
示例4: __init__
def __init__(self, mode, data_type, log_csv_path, feature_path, label_path, debug_limit):
FeatureExtractor.__init__(self, mode, data_type, log_csv_path, feature_path, debug_limit)
labels = {}
with open(label_path, 'r') as r:
for line in r:
eid, dropout = line.strip().split(',')
if str.isdigit(eid):
labels[int(eid)] = int(dropout)
self.labels = labels
示例5: main
def main():
caffe_alexnet_path = "/path/to/caffe-modelzoo/AlexNet"
caffe_vgg16_path = "/path/to/caffe-modelzoo/VGG16"
caffe_googlenet_path = "/path/to/caffe-modelzoo/GoogleNet"
keys_path = "/path/to/dataset/keys.txt"
data_path = "/path/to/dataset/images"
dst_path = "/path/to/dataset/features.npy"
modelname = "VGG16"
# load pre-trained model
if modelname == "AlexNet":
if not os.path.exists(os.path.join(caffe_alexnet_path, "imagenet_mean.npy")):
convert_mean_file(caffe_alexnet_path)
convnet = FeatureExtractor(
prototxt_path=os.path.join(caffe_alexnet_path, "alexnet_deploy.prototxt"),
caffemodel_path=os.path.join(caffe_alexnet_path, "alexnet.caffemodel"),
target_layer_name="fc6",
image_size=227,
mean_path=os.path.join(caffe_alexnet_path, "imagenet_mean.npy")
)
elif modelname == "VGG16":
convnet = FeatureExtractor(
prototxt_path=os.path.join(caffe_vgg16_path, "vgg16_deploy.prototxt"),
caffemodel_path=os.path.join(caffe_vgg16_path, "vgg16.caffemodel"),
target_layer_name="fc6",
image_size=224,
mean_values=[103.939, 116.779, 123.68]
)
elif modelname == "GoogleNet":
googlenet = FeatureExtractor(
prototxt_path=os.path.join(caffe_googlenet_path, "googlenet_deploy.prototxt"),
caffemodel_path=os.path.join(caffe_googlenet_path, "googlenet.caffemodel"),
target_layer_name="pool5/7x7_s1",
image_size=224,
mean_values=[104.0, 117.0, 123.0]
)
else:
print "Unknown model name: %s" % modelname
sys.exit(-1)
# data list
keys = load_keys(keys_path)
# feature extraction
feats = []
for key in keys:
img = cv2.imread(os.path.join(data_path, key))
assert img is not None
feat = convnet.transform(img)
feats.append(feat)
feats = np.asarray(feats)
np.save(dst_path, feats)
print "Done."
示例6: train_model
def train_model(X_df, y_array, skf_is):
fe = FeatureExtractor()
fe.fit(X_df, y_array)
X_array = fe.transform(X_df)
# Regression
train_is, _ = skf_is
X_train_array = np.array([X_array[i] for i in train_is])
y_train_array = np.array([y_array[i] for i in train_is])
reg = Regressor()
reg.fit(X_train_array, y_train_array)
return fe, reg
示例7: makefeatures
def makefeatures(self, sents_list, ppindexlist):
"""
ARGS
sent_list: [[s1word1,s1word2,...], [s2word1,s2word2,...],...]
RETURNS
_features: a list of feature set (dict)
"""
_features = []
for sent, ppindex in zip(sents_list, ppindexlist):
fe = FeatureExtractor(sent, ppindex, "succ")
_features.append(fe.features())
return _features
示例8: generate_seti
def generate_seti(filenames, for_test=False):
files = []
for filename in filenames:
for fname in glob.glob(filename):
files.append(fname)
print 'logs_to_seti reading from files: %s' % (str(files))
setis = []
# Read each file where each row represents a training example.
for fname in files:
num_lines = 0
num_invalid_lines = 0
num_bad_entry_lines = 0
bad_entry_lines = []
# Read examples from file.
with open(fname, 'rb') as csvfile:
reader = csv.reader(csvfile)
reader.next() # ignore header
i = 0
invalid_lines = []
for csv_line in reader:
num_lines += 1
bad_line, reason = is_bad_line(csv_line)
if bad_line:
num_invalid_lines += 1
continue
#try:
renter_form, err = _to_renter_form(csv_line)
if renter_form is None:
print err
num_bad_entry_lines += 1
bad_entry_lines.append(csv_line)
continue
fe = FeatureExtractor(for_test=for_test)
seti = fe.to_seti(renter_form)
setis.append(seti)
#except Exception as e:
# num_invalid_lines += 1
# invalid_lines.append(i)
# print 'e: %s' % (str(e))
# PrintException()
# print 'Could not parse line %d. %d cols. \n%s' % (i, len(csv_line), csv_line)
i += 1
# Finished handling file.
print 'File: %s' % fname
valid_lines = num_lines-num_invalid_lines-num_bad_entry_lines
print 'Num lines: %d. Valid: %d. Invalid: %d. Bady entry: %d' % (num_lines, valid_lines, num_invalid_lines, num_bad_entry_lines)
if len(setis) == 0:
raise Exception('No setis generated!')
return setis
示例9: extract_data
def extract_data(self, id, extraction_method, label_type):
extractor = FeatureExtractor()
feature_vector = extractor.extract_feature_vector(id, extraction_method)
if label_type == 'compiler':
label = self.extract_compiler_label(id) # for compiler estimation
elif label_type == 'optimization_level':
label = self.extract_optimization_level_label(id) # for optimization level estimation
elif label_type == 'test':
return feature_vector # for test data
else:
sys.stderr.write('Unknown label type specified')
sys.exit()
return label, feature_vector
示例10: processDir
def processDir(corpusName, mailCorpus, maildir):
mailIterator = mailCorpus.getFilesList(maildir)
mailStorage = MailStorage(corpusName)
featureExtractor = FeatureExtractor()
progress = ProgressDisplay(len(mailIterator), 'Processing emails')
# Output files are named 1 to numMails
index = 1
for mail in mailIterator:
processed = processMail(maildir, mail, mailCorpus)
features = featureExtractor.process(processed)
mailStorage.store(features, str(index))
index += 1
progress.update()
示例11: update_database_from_file
def update_database_from_file(self,
file_name,
asm_file_path,
gdl_file_path,
compiler=None,
optimization_level=None):
file_name += '_' + compiler + '_' + optimization_level
parser = IDAFileParser()
extractor = FeatureExtractor()
db_constructor = DatabaseConstructor()
# Update file_name table
db_constructor.insert_file_name(file_name)
# Update instruction_sequence table
instruction_list = parser.extract_instruction(asm_file_path)
db_constructor.insert_instruction_sequence(file_name, instruction_list)
# Update instruction_code_block table
code_block_list = parser.extract_code_block(asm_file_path)
db_constructor.insert_code_block(file_name, code_block_list)
# Update opcode_variety table
opcode_list = parser.extract_opcode(asm_file_path)
db_constructor.append_opcode_variety(opcode_list)
# Update bigram_variety table
bigram_list = extractor.extract_ngram_list(opcode_list, 2)
db_constructor.append_bigram_variety(bigram_list)
# Update trigram_variety table
trigram_list = extractor.extract_ngram_list(opcode_list, 3)
db_constructor.append_trigram_variety(trigram_list)
# Update api table
api_list = parser.extract_api(gdl_file_path)
db_constructor.insert_api(file_name, api_list)
# Update api_variety table
db_constructor.append_api_variety(api_list)
if compiler is not None:
# Update compiler_information table
db_constructor.insert_compiler_information(file_name, compiler)
if optimization_level is not None:
# Update optimization_level_information table
db_constructor.insert_optimization_level_information(file_name, optimization_level)
示例12: __init__
def __init__(self, movie_dict=None, act_set=None, slot_set=None, db=None, corpus=None,
train=True, _reload=False, n_hid=100, batch=128, ment=0., inputtype='full', upd=10,
sl='e2e', rl='e2e', pol_start=600, lr=0.005, N=1, tr=2.0, ts=0.5, max_req=2, frac=0.5,
name=None):
self.movie_dict = movie_dict
self.act_set = act_set
self.slot_set = slot_set
self.database = db
self.max_turn = dialog_config.MAX_TURN
self.training = train
self.feat_extractor = FeatureExtractor(corpus,self.database.path,N=N)
out_size = len(dialog_config.inform_slots)+1
in_size = len(self.feat_extractor.grams) + len(dialog_config.inform_slots)
slot_sizes = [self.movie_dict.lengths[s] for s in dialog_config.inform_slots]
self._init_model(in_size, out_size, slot_sizes, self.database, \
n_hid=n_hid, learning_rate_sl=lr, batch_size=batch, ment=ment, inputtype=inputtype, \
sl=sl, rl=rl)
self._name = name
if _reload: self.load_model(dialog_config.MODEL_PATH+self._name)
if train: self.save_model(dialog_config.MODEL_PATH+self._name)
self._init_experience_pool(batch)
self.episode_count = 0
self.recent_rewards = deque([], 1000)
self.recent_successes = deque([], 1000)
self.recent_turns = deque([], 1000)
self.recent_loss = deque([], 10)
self.discount = 0.99
self.num_updates = 0
self.pol_start = pol_start
self.tr = tr
self.ts = ts
self.max_req = max_req
self.frac = frac
self.upd = upd
示例13: TestFeatureExtractor
class TestFeatureExtractor(unittest.TestCase):
'''
Unit tests for the FeatureExtractor class. Does simple tests to insure that
the feature vector we get back is of the right length and has frequency
data that makes sense. More tests should be added.
'''
def setUp(self):
'''Sets up the test by constructing feature vectors to get tested'''
self.record1 = SeqRecord(Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF",
IUPAC.protein),
id="YP_025292.1", name="HokC",
description="toxic membrane protein, small")
self.seq1 = self.record1.seq
self.feature_extractor = FeatureExtractor()
self.feature_vector1 = self.feature_extractor.extract_features(self.seq1)
def test_feature_vector_length(self):
'''Tests that the feature vector is 400 elements long'''
self.assertEqual(len(self.feature_vector1), 400, msg="Feature vector not 400 long")
def test_dipeptide_frequency_sum(self):
'''Tests that the dipeptide frequencies sum to 1'''
checksum = 0.0
for i in range(0,400):
checksum += self.feature_vector1[i]
self.assertAlmostEqual(checksum, 1.0, places=5, msg="Frequencies don't sum to 1")
示例14: train_model
def train_model(self, model_out_file):
"""
Extract the features from self.data and train the classifier. Output pickled model to model_out_file
:param model_out_file:
:return: None
"""
if self.data is None:
raise Exception("Trying to train model without any data.")
sys.stderr.write("Extracting features from data.\n")
self.featureExtractor = FeatureExtractor(self.data)
feature_matrix = self.featureExtractor.extract_full_feature_matrix()
labels = np.array([0 if lab == "Romantic" else 1 for lab in self.data["is_romantic"]])
sys.stderr.write("Training classifier.\n")
self.classifier = LogisticRegression() if self.classifier_type == "logit" else DecisionTreeClassifier()
self.classifier.fit(feature_matrix, labels)
sys.stderr.write("Saving classifier.\n")
with open(model_out_file, "w") as f:
pickle.dump(self.classifier, f)
示例15: predict_model
def predict_model(self, model_file=None, output_file=None, output_probability_file=None):
"""
Predict classes on self.data and output to output_file
:param model_file: Model file to read model in from. Otherwise looks for self.classifier
:param output_file: File to save predictions in
:param output_probability_file: File to save predicted probabilities in
:return: predicted classes (array)
"""
if not self.classifier:
if not model_file:
raise Exception("No model to predict with.")
else:
with open(model_file) as f:
self.classifier = pickle.load(f)
if self.data is None:
raise Exception("Trying to predict using model with no data loaded.")
self.featureExtractor = FeatureExtractor(self.data)
feature_matrix = self.featureExtractor.extract_full_feature_matrix()
self.predictions = self.classifier.predict(feature_matrix)
if output_file is not None:
np.savetxt(output_file, self.predictions, delimiter=",", fmt="%d")
if output_probability_file is not None:
pred_probs = self.classifier.predict_proba(feature_matrix)
np.savetxt(output_probability_file, pred_probs, delimiter=",", fmt="%.3f")
return self.predictions