本文整理汇总了Python中vggish_params.NUM_FRAMES属性的典型用法代码示例。如果您正苦于以下问题:Python vggish_params.NUM_FRAMES属性的具体用法?Python vggish_params.NUM_FRAMES怎么用?Python vggish_params.NUM_FRAMES使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类vggish_params
的用法示例。
在下文中一共展示了vggish_params.NUM_FRAMES属性的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: define_vggish_slim
# 需要导入模块: import vggish_params [as 别名]
# 或者: from vggish_params import NUM_FRAMES [as 别名]
def define_vggish_slim(training=False):
"""Defines the VGGish TensorFlow model.
All ops are created in the current default graph, under the scope 'vggish/'.
The input is a placeholder named 'vggish/input_features' of type float32 and
shape [batch_size, num_frames, num_bands] where batch_size is variable and
num_frames and num_bands are constants, and [num_frames, num_bands] represents
a log-mel-scale spectrogram patch covering num_bands frequency bands and
num_frames time frames (where each frame step is usually 10ms). This is
produced by computing the stabilized log(mel-spectrogram + params.LOG_OFFSET).
The output is an op named 'vggish/embedding' which produces the activations of
a 128-D embedding layer, which is usually the penultimate layer when used as
part of a full model with a final classifier layer.
Args:
training: If true, all parameters are marked trainable.
Returns:
The op 'vggish/embeddings'.
"""
# Defaults:
# - All weights are initialized to N(0, INIT_STDDEV).
# - All biases are initialized to 0.
# - All activations are ReLU.
# - All convolutions are 3x3 with stride 1 and SAME padding.
# - All max-pools are 2x2 with stride 2 and SAME padding.
with slim.arg_scope([slim.conv2d, slim.fully_connected],
weights_initializer=tf.truncated_normal_initializer(
stddev=params.INIT_STDDEV),
biases_initializer=tf.zeros_initializer(),
activation_fn=tf.nn.relu,
trainable=training), \
slim.arg_scope([slim.conv2d],
kernel_size=[3, 3], stride=1, padding='SAME'), \
slim.arg_scope([slim.max_pool2d],
kernel_size=[2, 2], stride=2, padding='SAME'), \
tf.variable_scope('vggish'):
# Input: a batch of 2-D log-mel-spectrogram patches.
features = tf.placeholder(
tf.float32, shape=(None, params.NUM_FRAMES, params.NUM_BANDS),
name='input_features')
# Reshape to 4-D so that we can convolve a batch with conv2d().
net = tf.reshape(features, [-1, params.NUM_FRAMES, params.NUM_BANDS, 1])
# The VGG stack of alternating convolutions and max-pools.
net = slim.conv2d(net, 64, scope='conv1')
net = slim.max_pool2d(net, scope='pool1')
net = slim.conv2d(net, 128, scope='conv2')
net = slim.max_pool2d(net, scope='pool2')
net = slim.repeat(net, 2, slim.conv2d, 256, scope='conv3')
net = slim.max_pool2d(net, scope='pool3')
net = slim.repeat(net, 2, slim.conv2d, 512, scope='conv4')
net = slim.max_pool2d(net, scope='pool4')
# Flatten before entering fully-connected layers
net = slim.flatten(net)
net = slim.repeat(net, 2, slim.fully_connected, 4096, scope='fc1')
# The embedding layer.
net = slim.fully_connected(net, params.EMBEDDING_SIZE, scope='fc2')
return tf.identity(net, name='embedding')
示例2: extract_audioset_embedding
# 需要导入模块: import vggish_params [as 别名]
# 或者: from vggish_params import NUM_FRAMES [as 别名]
def extract_audioset_embedding():
"""Extract log mel spectrogram features.
"""
# Arguments & parameters
mel_bins = vggish_params.NUM_BANDS
sample_rate = vggish_params.SAMPLE_RATE
input_len = vggish_params.NUM_FRAMES
embedding_size = vggish_params.EMBEDDING_SIZE
'''You may modify the EXAMPLE_HOP_SECONDS in vggish_params.py to change the
hop size. '''
# Paths
audio_path = 'appendixes/01.wav'
checkpoint_path = os.path.join('vggish_model.ckpt')
pcm_params_path = os.path.join('vggish_pca_params.npz')
if not os.path.isfile(checkpoint_path):
raise Exception('Please download vggish_model.ckpt from '
'https://storage.googleapis.com/audioset/vggish_model.ckpt '
'and put it in the root of this codebase. ')
if not os.path.isfile(pcm_params_path):
raise Exception('Please download pcm_params_path from '
'https://storage.googleapis.com/audioset/vggish_pca_params.npz '
'and put it in the root of this codebase. ')
# Load model
sess = tf.Session()
vggish_slim.define_vggish_slim(training=False)
vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path)
features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME)
embedding_tensor = sess.graph.get_tensor_by_name(vggish_params.OUTPUT_TENSOR_NAME)
pproc = vggish_postprocess.Postprocessor(pcm_params_path)
# Read audio
(audio, _) = read_audio(audio_path, target_fs=sample_rate)
# Extract log mel feature
logmel = vggish_input.waveform_to_examples(audio, sample_rate)
# Extract embedding feature
[embedding_batch] = sess.run([embedding_tensor], feed_dict={features_tensor: logmel})
# PCA
postprocessed_batch = pproc.postprocess(embedding_batch)
print('Audio length: {}'.format(len(audio)))
print('Log mel shape: {}'.format(logmel.shape))
print('Embedding feature shape: {}'.format(postprocessed_batch.shape))