本文整理汇总了Python中utils.preprocess方法的典型用法代码示例。如果您正苦于以下问题:Python utils.preprocess方法的具体用法?Python utils.preprocess怎么用?Python utils.preprocess使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类utils
的用法示例。
在下文中一共展示了utils.preprocess方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: decode
# 需要导入模块: import utils [as 别名]
# 或者: from utils import preprocess [as 别名]
def decode(self, words, lower=False):
""" Return the words with tags of the given words.
args:
- words (list): Input words.
- lower (bool, optional): If lower is True, all uppercase characters in a list \
of the words are converted into lowercase characters.
return:
- object : The object of the words with tags.
"""
if not type(words) == list:
raise AssertionError("Please input a list of words.")
words = [utils.preprocess_without_rstrip(w) if w == " " or w == " "
else utils.preprocess(w) for w in words]
postags = self._postagging(words, lower)
return postags
示例2: deep_dream
# 需要导入模块: import utils [as 别名]
# 或者: from utils import preprocess [as 别名]
def deep_dream(image, model, iterations, lr, octave_scale, num_octaves):
""" Main deep dream method """
image = preprocess(image).unsqueeze(0).cpu().data.numpy()
# Extract image representations for each octave
octaves = [image]
for _ in range(num_octaves - 1):
octaves.append(nd.zoom(octaves[-1], (1, 1, 1 / octave_scale, 1 / octave_scale), order=1))
detail = np.zeros_like(octaves[-1])
for octave, octave_base in enumerate(tqdm.tqdm(octaves[::-1], desc="Dreaming")):
if octave > 0:
# Upsample detail to new octave dimension
detail = nd.zoom(detail, np.array(octave_base.shape) / np.array(detail.shape), order=1)
# Add deep dream detail from previous octave to new base
input_image = octave_base + detail
# Get new deep dream image
dreamed_image = dream(input_image, model, iterations, lr)
# Extract deep dream details
detail = dreamed_image - octave_base
return deprocess(dreamed_image)
示例3: __init__
# 需要导入模块: import utils [as 别名]
# 或者: from utils import preprocess [as 别名]
def __init__(self, vocabs=None, params=None, hp=None, single_word_list=None):
if vocabs is None:
vocabs = base + '/data/nagisa_v001.dict'
if params is None:
params = base + '/data/nagisa_v001.model'
if hp is None:
hp = base + '/data/nagisa_v001.hp'
# Load vocaburary files
vocabs = utils.load_data(vocabs)
self._uni2id, self._bi2id, self._word2id, self._pos2id, self._word2postags = vocabs
self._id2pos = {v:k for k, v in self._pos2id.items()}
self.id2pos = self._id2pos
self.postags = [postag for postag in self._pos2id.keys()]
# Load a hyper-parameter file
self._hp = utils.load_data(hp)
# Construct a word segmentation model and a pos tagging model
self._model = model.Model(self._hp, params)
# If a word is included in the single_word_list,
# it is recognized as a single word forcibly.
self.pattern = None
if single_word_list:
single_word_list = [utils.preprocess(w) for w in single_word_list if len(w) > 1]
single_word_list = [w.replace('(', '\(').replace(')', '\)')
for w in single_word_list]
single_word_list = sorted(single_word_list, key=lambda x:-len(x))
if len(single_word_list) > 0:
self.pattern = re.compile('|'.join(single_word_list))
# If use_noun_heuristic is True, nouns are more lilely to appear.
if u'名詞' in self._pos2id:
self.use_noun_heuristic = True
else:
self.use_noun_heuristic = False
示例4: telemetry
# 需要导入模块: import utils [as 别名]
# 或者: from utils import preprocess [as 别名]
def telemetry(sid, data):
if data:
# The current steering angle of the car
steering_angle = float(data["steering_angle"])
# The current throttle of the car
throttle = float(data["throttle"])
# The current speed of the car
speed = float(data["speed"])
# The current image from the center camera of the car
image = Image.open(BytesIO(base64.b64decode(data["image"])))
# save frame
if args.image_folder != '':
timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
image_filename = os.path.join(args.image_folder, timestamp)
image.save('{}.jpg'.format(image_filename))
try:
image = np.asarray(image) # from PIL image to numpy array
image = utils.preprocess(image) # apply the preprocessing
image = np.array([image]) # the model expects 4D array
# predict the steering angle for the image
steering_angle = float(model.predict(image, batch_size=1))
# lower the throttle as the speed increases
# if the speed is above the current speed limit, we are on a downhill.
# make sure we slow down first and then go back to the original max speed.
global speed_limit
if speed > speed_limit:
speed_limit = MIN_SPEED # slow down
else:
speed_limit = MAX_SPEED
throttle = 1.0 - steering_angle**2 - (speed/speed_limit)**2
print('{} {} {}'.format(steering_angle, throttle, speed))
send_control(steering_angle, throttle)
except Exception as e:
print(e)
else:
# NOTE: DON'T EDIT THIS.
sio.emit('manual', data={}, skip_sid=True)
示例5: std
# 需要导入模块: import utils [as 别名]
# 或者: from utils import preprocess [as 别名]
def std(image):
return utils.preprocess.per_image_standardization(image)
示例6: detect
# 需要导入模块: import utils [as 别名]
# 或者: from utils import preprocess [as 别名]
def detect(sess, model, names, image, path):
preprocess = eval(args.preprocess)
_, height, width, _ = image.get_shape().as_list()
_image = read_image(path)
image_original = np.array(np.uint8(_image))
if len(image_original.shape) == 2:
image_original = np.repeat(np.expand_dims(image_original, -1), 3, 2)
image_height, image_width, _ = image_original.shape
image_std = preprocess(np.array(np.uint8(_image.resize((width, height)))).astype(np.float32))
feed_dict = {image: np.expand_dims(image_std, 0)}
tensors = [model.conf, model.xy_min, model.xy_max]
conf, xy_min, xy_max = sess.run([tf.check_numerics(t, t.op.name) for t in tensors], feed_dict=feed_dict)
boxes = utils.postprocess.non_max_suppress(conf[0], xy_min[0], xy_max[0], args.threshold, args.threshold_iou)
scale = [image_width / model.cell_width, image_height / model.cell_height]
fig = plt.figure()
ax = fig.gca()
ax.imshow(image_original)
colors = [prop['color'] for _, prop in zip(names, itertools.cycle(plt.rcParams['axes.prop_cycle']))]
cnt = 0
for _conf, _xy_min, _xy_max in boxes:
index = np.argmax(_conf)
if _conf[index] > args.threshold:
wh = _xy_max - _xy_min
_xy_min = _xy_min * scale
_wh = wh * scale
linewidth = min(_conf[index] * 10, 3)
ax.add_patch(patches.Rectangle(_xy_min, _wh[0], _wh[1], linewidth=linewidth, edgecolor=colors[index], facecolor='none'))
ax.annotate(names[index] + ' (%.1f%%)' % (_conf[index] * 100), _xy_min, color=colors[index])
cnt += 1
fig.canvas.set_window_title('%d objects detected' % cnt)
ax.set_xticks([])
ax.set_yticks([])
return fig
示例7: make_args
# 需要导入模块: import utils [as 别名]
# 或者: from utils import preprocess [as 别名]
def make_args():
parser = argparse.ArgumentParser()
parser.add_argument('path', help='input image path')
parser.add_argument('-c', '--config', nargs='+', default=['config.ini'], help='config file')
parser.add_argument('-p', '--preprocess', default='std', help='the preprocess function')
parser.add_argument('-t', '--threshold', type=float, default=0.3)
parser.add_argument('--threshold_iou', type=float, default=0.4, help='IoU threshold')
parser.add_argument('-e', '--exts', nargs='+', default=['.jpg', '.png'])
parser.add_argument('--level', default='info', help='logging level')
return parser.parse_args()
示例8: main
# 需要导入模块: import utils [as 别名]
# 或者: from utils import preprocess [as 别名]
def main(args):
# Get the configuration file
config = utils.import_file(os.path.join(args.model_dir, 'config.py'), 'config')
# Get the paths of the aligned images
with open(args.image_list) as f:
paths = [line.strip() for line in f]
print('%d images to load.' % len(paths))
assert(len(paths)>0)
# Pre-process the images
images = utils.preprocess(paths, config, False)
switch = np.array([utils.is_typeB(p) for p in paths])
print('%d type A images and %d type B images.' % (np.sum(switch), np.sum(~switch)))
# Load model files and config file
if config.use_sibling:
network = SiblingNetwork()
else:
network = BaseNetwork()
network.load_model(args.model_dir)
# Run forward pass to calculate embeddings
if config.use_sibling:
embeddings = network.extract_feature(images, switch, args.batch_size, verbose=True)
else:
embeddings = network.extract_feature(images, args.batch_size, verbose=True)
# Output the extracted features
np.save(args.output, embeddings)
示例9: wakati
# 需要导入模块: import utils [as 别名]
# 或者: from utils import preprocess [as 别名]
def wakati(self, text, lower=False):
"""Word segmentation function. Return the segmented words.
args:
- text (str): An input sentence.
- lower (bool): If lower is True, all uppercase characters in a list \
of the words are converted into lowercase characters.
return:
- words (list): A list of the words.
"""
text = utils.preprocess(text)
lower_text = text.lower()
feats = utils.feature_extraction(text=lower_text,
uni2id=self._uni2id,
bi2id=self._bi2id,
dictionary=self._word2id,
window_size=self._hp['WINDOW_SIZE'])
obs = self._model.encode_ws(feats)
obs = [ob.npvalue() for ob in obs]
tags = utils.np_viterbi(self._model.trans_array, obs)
# A word can be recognized as a single word forcibly.
if self.pattern:
for match in self.pattern.finditer(text):
span = match.span()
span_s = span[0]
span_e = span[1]
if (span_e - span_s) == 1:
tags[span_s:span_e] = [3]
else:
tags[span_s:span_e] = [0]+[1]*((span_e-span_s)-2)+[2]
if span_s != 0:
previous_tag = tags[span_s-1]
if previous_tag == 0: # 0 is BEGIN tag
tags[span_s-1] = 3 # 3 is SINGLE tag
elif previous_tag == 1: # 1 is MIDDEL tag
tags[span_s-1] = 2 # 2 is END tag
if span_e != len(text):
next_tag = tags[span_e]
if next_tag == 1: # 1 is MIDDEL tag
tags[span_e] = 0 # 0 is BEGIN tag
elif next_tag == 2: # 2 is END tag
tags[span_e] = 3 # 3 is SINGLE tag
if lower is True:
words = utils.segmenter_for_bmes(lower_text, tags)
else:
words = utils.segmenter_for_bmes(text, tags)
return words
示例10: classify
# 需要导入模块: import utils [as 别名]
# 或者: from utils import preprocess [as 别名]
def classify(document):
"""
Classify a document with the Hierarchial Attention Network (HAN).
:param document: a document in text form
:return: pre-processed tokenized document, class scores, attention weights for words, attention weights for sentences, sentence lengths
"""
# A list to store the document tokenized into words
doc = list()
# Tokenize document into sentences
sentences = list()
for paragraph in preprocess(document).splitlines():
sentences.extend([s for s in sent_tokenizer.tokenize(paragraph)])
# Tokenize sentences into words
for s in sentences[:sentence_limit]:
w = word_tokenizer.tokenize(s)[:word_limit]
if len(w) == 0:
continue
doc.append(w)
# Number of sentences in the document
sentences_in_doc = len(doc)
sentences_in_doc = torch.LongTensor([sentences_in_doc]).to(device) # (1)
# Number of words in each sentence
words_in_each_sentence = list(map(lambda s: len(s), doc))
words_in_each_sentence = torch.LongTensor(words_in_each_sentence).unsqueeze(0).to(device) # (1, n_sentences)
# Encode document with indices from the word map
encoded_doc = list(
map(lambda s: list(map(lambda w: word_map.get(w, word_map['<unk>']), s)) + [0] * (word_limit - len(s)),
doc)) + [[0] * word_limit] * (sentence_limit - len(doc))
encoded_doc = torch.LongTensor(encoded_doc).unsqueeze(0).to(device)
# Apply the HAN model
scores, word_alphas, sentence_alphas = model(encoded_doc, sentences_in_doc,
words_in_each_sentence) # (1, n_classes), (1, n_sentences, max_sent_len_in_document), (1, n_sentences)
scores = scores.squeeze(0) # (n_classes)
scores = nn.functional.softmax(scores, dim=0) # (n_classes)
word_alphas = word_alphas.squeeze(0) # (n_sentences, max_sent_len_in_document)
sentence_alphas = sentence_alphas.squeeze(0) # (n_sentences)
words_in_each_sentence = words_in_each_sentence.squeeze(0) # (n_sentences)
return doc, scores, word_alphas, sentence_alphas, words_in_each_sentence
示例11: read_data_from_file
# 需要导入模块: import utils [as 别名]
# 或者: from utils import preprocess [as 别名]
def read_data_from_file(data_path):
maybe_download()
with open(data_path) as f:
text = f.read()
###########################################################
# ------------------- Preprocessing -----------------------
# 1. Tokenize punctuations e.g. period -> <PERIOD>
# 2. Remove words that show up five times or fewer
words = utils.preprocess(text)
# Hmm, let's take a look at the processed data
print('First 30 words:', words[:30])
print('Total words:', len(words))
print('Total unique words:', len(set(words)))
# Create two dictionaries to convert words to integers
vocab_to_int, int_to_vocab = utils.create_lookup_tables(words)
n_vocab = len(int_to_vocab)
# Convert words into integers
int_words = [vocab_to_int[w] for w in words]
###########################################################
# ------------------- Subsampling -------------------------
# Some words like "the", "a", "of" etc don't provide much
# information. So we might want to remove some of them.
# This results in faster and better result.
# The probability that a word is discarded is
# P(w) = 1 - sqrt(1 / frequency(w))
each_word_count = Counter(int_words)
total_count = len(int_words)
threshold = 1e-5 # FLAGS.drop_word_threshold
freqs = {word: count/total_count for word,
count in each_word_count.items()}
probs = {word: 1 - np.sqrt(threshold/freqs[word])
for word in each_word_count}
train_words = [word for word in int_words if random.random() <
(1 - probs[word])]
print('After subsampling, first 30 words:', train_words[:30])
print('After subsampling, total words:', len(train_words))
# Subsampling makes it worse for eliminating contextual info
# return train_words, int_to_vocab, vocab_to_int, n_vocab
return int_words, int_to_vocab, vocab_to_int, n_vocab
示例12: read_data_from_file
# 需要导入模块: import utils [as 别名]
# 或者: from utils import preprocess [as 别名]
def read_data_from_file(data_path):
maybe_download()
with open(data_path) as f:
text = f.read()
###########################################################
# ------------------- Preprocessing -----------------------
# 1. Tokenize punctuations e.g. period -> <PERIOD>
# 2. Remove words that show up five times or fewer
words = utils.preprocess(text)
# Hmm, let's take a look at the processed data
print('First 30 words:', words[:30])
print('Total words:', len(words))
print('Total unique words:', len(set(words)))
# Create two dictionaries to convert words to integers
vocab_to_int, int_to_vocab = utils.create_lookup_tables(words)
n_vocab = len(int_to_vocab)
# Convert words into integers
int_words = [vocab_to_int[w] for w in words]
###########################################################
# ------------------- Subsampling -------------------------
# Some words like "the", "a", "of" etc don't provide much
# information. So we might want to remove some of them.
# This results in faster and better result.
# The probability that a word is discarded is
# P(w) = 1 - sqrt(1 / frequency(w))
each_word_count = Counter(int_words)
total_count = len(int_words)
threshold = FLAGS.drop_word_threshold
freqs = {word: count/total_count for word,
count in each_word_count.items()}
probs = {word: 1 - np.sqrt(threshold/freqs[word])
for word in each_word_count}
train_words = [word for word in int_words if random.random() <
(1 - probs[word])]
print('After subsampling, first 30 words:', train_words[:30])
print('After subsampling, total words:', len(train_words))
return train_words, int_to_vocab, vocab_to_int, n_vocab