本文整理汇总了Python中vggish_params.EMBEDDING_SIZE属性的典型用法代码示例。如果您正苦于以下问题:Python vggish_params.EMBEDDING_SIZE属性的具体用法?Python vggish_params.EMBEDDING_SIZE怎么用?Python vggish_params.EMBEDDING_SIZE使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类vggish_params
的用法示例。
在下文中一共展示了vggish_params.EMBEDDING_SIZE属性的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import vggish_params [as 别名]
# 或者: from vggish_params import EMBEDDING_SIZE [as 别名]
def __init__(self, pca_params_npz_path):
"""Constructs a postprocessor.
Args:
pca_params_npz_path: Path to a NumPy-format .npz file that
contains the PCA parameters used in postprocessing.
"""
params = np.load(pca_params_npz_path)
self._pca_matrix = params[vggish_params.PCA_EIGEN_VECTORS_NAME]
# Load means into a column vector for easier broadcasting later.
self._pca_means = params[vggish_params.PCA_MEANS_NAME].reshape(-1, 1)
assert self._pca_matrix.shape == (
vggish_params.EMBEDDING_SIZE, vggish_params.EMBEDDING_SIZE), (
'Bad PCA matrix shape: %r' % (self._pca_matrix.shape,))
assert self._pca_means.shape == (vggish_params.EMBEDDING_SIZE, 1), (
'Bad PCA means shape: %r' % (self._pca_means.shape,))
示例2: postprocess
# 需要导入模块: import vggish_params [as 别名]
# 或者: from vggish_params import EMBEDDING_SIZE [as 别名]
def postprocess(self, embeddings_batch):
"""Applies postprocessing to a batch of embeddings.
Args:
embeddings_batch: An nparray of shape [batch_size, embedding_size]
containing output from the embedding layer of VGGish.
Returns:
An nparray of the same shape as the input but of type uint8,
containing the PCA-transformed and quantized version of the input.
"""
assert len(embeddings_batch.shape) == 2, (
'Expected 2-d batch, got %r' % (embeddings_batch.shape,))
assert embeddings_batch.shape[1] == vggish_params.EMBEDDING_SIZE, (
'Bad batch shape: %r' % (embeddings_batch.shape,))
# Apply PCA.
# - Embeddings come in as [batch_size, embedding_size].
# - Transpose to [embedding_size, batch_size].
# - Subtract pca_means column vector from each column.
# - Premultiply by PCA matrix of shape [output_dims, input_dims]
# where both are are equal to embedding_size in our case.
# - Transpose result back to [batch_size, embedding_size].
pca_applied = np.dot(self._pca_matrix,
(embeddings_batch.T - self._pca_means)).T
# Quantize by:
# - clipping to [min, max] range
clipped_embeddings = np.clip(
pca_applied, vggish_params.QUANTIZE_MIN_VAL,
vggish_params.QUANTIZE_MAX_VAL)
# - convert to 8-bit in range [0.0, 255.0]
quantized_embeddings = (
(clipped_embeddings - vggish_params.QUANTIZE_MIN_VAL) *
(255.0 /
(vggish_params.QUANTIZE_MAX_VAL - vggish_params.QUANTIZE_MIN_VAL)))
# - cast 8-bit float to uint8
quantized_embeddings = quantized_embeddings.astype(np.uint8)
return quantized_embeddings
示例3: define_vggish_slim
# 需要导入模块: import vggish_params [as 别名]
# 或者: from vggish_params import EMBEDDING_SIZE [as 别名]
def define_vggish_slim(training=False):
"""Defines the VGGish TensorFlow model.
All ops are created in the current default graph, under the scope 'vggish/'.
The input is a placeholder named 'vggish/input_features' of type float32 and
shape [batch_size, num_frames, num_bands] where batch_size is variable and
num_frames and num_bands are constants, and [num_frames, num_bands] represents
a log-mel-scale spectrogram patch covering num_bands frequency bands and
num_frames time frames (where each frame step is usually 10ms). This is
produced by computing the stabilized log(mel-spectrogram + params.LOG_OFFSET).
The output is an op named 'vggish/embedding' which produces the activations of
a 128-D embedding layer, which is usually the penultimate layer when used as
part of a full model with a final classifier layer.
Args:
training: If true, all parameters are marked trainable.
Returns:
The op 'vggish/embeddings'.
"""
# Defaults:
# - All weights are initialized to N(0, INIT_STDDEV).
# - All biases are initialized to 0.
# - All activations are ReLU.
# - All convolutions are 3x3 with stride 1 and SAME padding.
# - All max-pools are 2x2 with stride 2 and SAME padding.
with slim.arg_scope([slim.conv2d, slim.fully_connected],
weights_initializer=tf.truncated_normal_initializer(
stddev=params.INIT_STDDEV),
biases_initializer=tf.zeros_initializer(),
activation_fn=tf.nn.relu,
trainable=training), \
slim.arg_scope([slim.conv2d],
kernel_size=[3, 3], stride=1, padding='SAME'), \
slim.arg_scope([slim.max_pool2d],
kernel_size=[2, 2], stride=2, padding='SAME'), \
tf.variable_scope('vggish'):
# Input: a batch of 2-D log-mel-spectrogram patches.
features = tf.placeholder(
tf.float32, shape=(None, params.NUM_FRAMES, params.NUM_BANDS),
name='input_features')
# Reshape to 4-D so that we can convolve a batch with conv2d().
net = tf.reshape(features, [-1, params.NUM_FRAMES, params.NUM_BANDS, 1])
# The VGG stack of alternating convolutions and max-pools.
net = slim.conv2d(net, 64, scope='conv1')
net = slim.max_pool2d(net, scope='pool1')
net = slim.conv2d(net, 128, scope='conv2')
net = slim.max_pool2d(net, scope='pool2')
net = slim.repeat(net, 2, slim.conv2d, 256, scope='conv3')
net = slim.max_pool2d(net, scope='pool3')
net = slim.repeat(net, 2, slim.conv2d, 512, scope='conv4')
net = slim.max_pool2d(net, scope='pool4')
# Flatten before entering fully-connected layers
net = slim.flatten(net)
net = slim.repeat(net, 2, slim.fully_connected, 4096, scope='fc1')
# The embedding layer.
net = slim.fully_connected(net, params.EMBEDDING_SIZE, scope='fc2')
return tf.identity(net, name='embedding')
示例4: extract_audioset_embedding
# 需要导入模块: import vggish_params [as 别名]
# 或者: from vggish_params import EMBEDDING_SIZE [as 别名]
def extract_audioset_embedding():
"""Extract log mel spectrogram features.
"""
# Arguments & parameters
mel_bins = vggish_params.NUM_BANDS
sample_rate = vggish_params.SAMPLE_RATE
input_len = vggish_params.NUM_FRAMES
embedding_size = vggish_params.EMBEDDING_SIZE
'''You may modify the EXAMPLE_HOP_SECONDS in vggish_params.py to change the
hop size. '''
# Paths
audio_path = 'appendixes/01.wav'
checkpoint_path = os.path.join('vggish_model.ckpt')
pcm_params_path = os.path.join('vggish_pca_params.npz')
if not os.path.isfile(checkpoint_path):
raise Exception('Please download vggish_model.ckpt from '
'https://storage.googleapis.com/audioset/vggish_model.ckpt '
'and put it in the root of this codebase. ')
if not os.path.isfile(pcm_params_path):
raise Exception('Please download pcm_params_path from '
'https://storage.googleapis.com/audioset/vggish_pca_params.npz '
'and put it in the root of this codebase. ')
# Load model
sess = tf.Session()
vggish_slim.define_vggish_slim(training=False)
vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path)
features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME)
embedding_tensor = sess.graph.get_tensor_by_name(vggish_params.OUTPUT_TENSOR_NAME)
pproc = vggish_postprocess.Postprocessor(pcm_params_path)
# Read audio
(audio, _) = read_audio(audio_path, target_fs=sample_rate)
# Extract log mel feature
logmel = vggish_input.waveform_to_examples(audio, sample_rate)
# Extract embedding feature
[embedding_batch] = sess.run([embedding_tensor], feed_dict={features_tensor: logmel})
# PCA
postprocessed_batch = pproc.postprocess(embedding_batch)
print('Audio length: {}'.format(len(audio)))
print('Log mel shape: {}'.format(logmel.shape))
print('Embedding feature shape: {}'.format(postprocessed_batch.shape))