本文整理汇总了C++中THNN_函数的典型用法代码示例。如果您正苦于以下问题:C++ THNN_函数的具体用法?C++ THNN_怎么用?C++ THNN_使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了THNN_函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: THNN_
void THNN_(VolumetricConvolutionMM_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *finput,
THTensor *fgradInput,
int kT, int kW, int kH,
int dT, int dW, int dH,
int pT, int pW, int pH,
accreal scale_)
{
real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
THNN_(VolumetricConvolutionMM_shapeCheck)(
state, input, gradOutput, gradWeight, gradBias,
kT, kW, kH, dT, dW, dH, pT, pW, pH, 1);
input = THTensor_(newContiguous)(input);
gradOutput = THTensor_(newContiguous)(gradOutput);
if (gradWeight) {
gradWeight = THNN_(newViewWeight)(gradWeight);
}
if (input->nDimension == 4) // non-batch mode
{
THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale);
}
else // batch mode
{
int64_t T = input->size[0];
int64_t t;
#ifdef _OPENMP
#pragma omp parallel for if(T > CONV3D_OMP_THRESHOLD) private(t)
#endif
for (t = 0; t < T; t++)
{
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
THTensor *finput_t = NULL;
if (gradWeight) {
finput_t = THTensor_(newSelect)(finput, 0, t);
}
THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale);
THTensor_(free)(gradOutput_t);
if (gradWeight) {
THTensor_(free)(finput_t);
}
}
}
THTensor_(free)(input);
THTensor_(free)(gradOutput);
if (gradWeight) {
THTensor_(free)(gradWeight);
}
}
示例2: THNN_
void THNN_(TemporalRowConvolution_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *finput,
THTensor *fgradInput,
int kW,
int dW,
int padW,
bool featFirst,
accreal scale_) {
real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
int ndim = input->nDimension;
THTensor *tinput, *tgradOutput;
if (!featFirst) {
tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2);
tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2);
input = THTensor_(newContiguous)(tinput);
gradOutput = THTensor_(newContiguous)(tgradOutput);
} else {
input = THTensor_(newContiguous)(input);
gradOutput = THTensor_(newContiguous)(gradOutput);
}
THNN_(TemporalRowConvolution_shapeCheck)
(state, input, gradOutput, gradWeight, gradBias, kW, dW, padW);
if (ndim == 2) {
THNN_(TemporalRowConvolution_accGradParameters_frame)(
gradOutput, gradWeight, gradBias, finput, scale);
} else {
int64_t T = input->size[0];
int64_t t;
for (t = 0; t < T; t++) {
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
THNN_(TemporalRowConvolution_accGradParameters_frame)(
gradOutput_t, gradWeight, gradBias, finput_t, scale);
THTensor_(free)(gradOutput_t);
THTensor_(free)(finput_t);
}
}
if (!featFirst) {
THTensor_(free)(tinput);
THTensor_(free)(tgradOutput);
}
THTensor_(free)(input);
THTensor_(free)(gradOutput);
}
示例3: THNN_
void THNN_(Col2Im_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
int64_t outputHeight, int64_t outputWidth,
int64_t kH, int64_t kW,
int64_t dilationH, int64_t dilationW,
int64_t padH, int64_t padW,
int64_t dH, int64_t dW) {
THNN_(Col2Im_shapeCheck)(state, input, NULL, outputHeight, outputWidth,
kH, kW, dilationH, dilationW, padH, padW, dH, dW);
bool batched_input = true;
if (input->dim() == 2) {
// Force batch
batched_input = false;
THTensor_(resize3d)(input, 1, input->size(0), input->size(1));
}
long batchSize = input->size(0);
long nInputPlane = input->size(1);
long nOutputPlane = nInputPlane / (kW * kH);
input = THTensor_(newContiguous)(input);
THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth);
THTensor_(zero)(output);
THTensor *input_n = THTensor_(new)();
THTensor *output_n = THTensor_(new)();
int64_t height_col = (outputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
int64_t width_col = (outputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
for (int64_t elt = 0; elt < batchSize; elt++) {
THTensor_(select)(input_n, input, 0, elt);
THTensor_(select)(output_n, output, 0, elt);
THNN_(col2im)(
input_n->data<scalar_t>(),
nOutputPlane,
outputHeight, outputWidth,
height_col, width_col,
kH, kW,
padH, padW,
dH, dW,
dilationH, dilationW, output_n->data<scalar_t>());
}
c10::raw::intrusive_ptr::decref(input_n);
c10::raw::intrusive_ptr::decref(output_n);
if (!batched_input) {
THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
}
c10::raw::intrusive_ptr::decref(input);
}
示例4: THNN_
void THNN_(LookupTable_renorm)(
THNNState *state,
THIndexTensor *idx,
THTensor *weight,
real maxNorm,
real normType)
{
if (!THTensor_(isContiguous)(weight))
THError("weight must be contiguous");
if (!THIndexTensor_(isContiguous)(idx))
THError("input must be contiguous");
if (THIndexTensor_(nDimension)(idx) != 1)
THError("idx must be a vector");
if (normType <= 0)
THError("non-positive-norm not supported");
long i;
THIndex_t *row_idx = THIndexTensor_(data)(idx);
long numel = THIndexTensor_(nElement)(idx);
long numw = THTensor_(size)(weight, 0);
long stride = THTensor_(stride)(weight, 0);
real *gw = THTensor_(data)(weight);
for (i=0; i<numel; i++)
if (row_idx[i] < 1 || row_idx[i] > numw)
THError("input out of range");
// get unique indices
qsort(row_idx, numel, sizeof(THIndex_t), THNN_(compare_THIndex));
long ptr = 0;
for (i=0; i<numel; i++)
if (i == 0 || row_idx[i] != row_idx[i-1])
row_idx[ptr++] = row_idx[i];
numel = ptr;
#ifdef _OPENMP
if (numel > 1000)
{
// The strategy is to parallelize over the rows that appear in
// row_idx, so that thread 1 handles the rows in row_idx[0..numel/nThreads].
// This distributes the work evenly to each thread.
#pragma omp parallel for private(i)
for (i=0; i<numel; i++)
{
long k = row_idx[i] - 1;
THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
}
return;
}
#endif
for (i=0; i<numel; i++)
{
long k = row_idx[i] - 1;
THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
}
}
示例5: THNN_
void THNN_(SpatialConvolutionMM_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
THTensor *finput,
THTensor *fgradInput,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH)
{
long nOutputPlane = weight->size[0];
THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 3, "Number of output features is not equal to nOutputPlane" );
THArgCheck(kW > 0 && kH > 0, 9, "kernel size should be greater than zero");
THArgCheck(dW > 0 && dH > 0, 11, "stride should be greater than zero");
THTensor_(resizeAs)(gradInput, input);
THTensor_(resizeAs)(fgradInput, finput);
// depending on the BLAS library, fgradInput (result tensor) might
// be left uninitialized on zero alpha, which might lead to weird behavior
// hence, to be safe, zero it
THTensor_(zero)(fgradInput);
THTensor_(transpose)(weight, weight, 0, 1);
if(input->nDimension == 3)
{
THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH, dW, dH, padW, padH);
}
else
{
long T = input->size[0];
long t;
#pragma omp parallel for private(t)
for(t = 0; t < T; t++)
{
THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH, dW, dH, padW, padH);
THTensor_(free)(gradInput_t);
THTensor_(free)(gradOutput_t);
THTensor_(free)(fgradInput_t);
}
}
THTensor_(transpose)(weight, weight, 0, 1);
}
示例6: THNN_
void THNN_(SparseLinear_legacyUpdateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias)
{
int64_t h, i;
int64_t outDim = THTensor_(size)(weight, 0);
int64_t inDim = THTensor_(size)(weight, 1);
THArgCheck(THNN_(checkLegacyInput)(input), 2, "input size must be batchsize x nnz x 2");
THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous");
THArgCheck(THNN_(checkSize1D)(bias, outDim), 5, "bias size wrong");
weight = THTensor_(newContiguous)(weight);
int64_t batchSize = THTensor_(size)(input, 0);
int64_t nnz = THTensor_(size)(input, 1);
THTensor_(resize2d)(output, batchSize, outDim);
// output = weight * input + bias
THTensor_(zero)(output);
#pragma omp parallel for private(h, i) schedule(static) if ( \
batchSize > 1 && batchSize * nnz * outDim > 10000)
for (h = 0; h < batchSize; h++) {
for (i = 0; i < nnz; i++) {
real val = THNN_(get3d)(input, h, i, 1);
if (val == 0) {
continue;
}
int64_t offset = (int64_t)(THNN_(get3d)(input, h, i, 0)) - 1;
if (offset >= 0 && offset < inDim) {
THBlas_(axpy)(outDim,
val,
COL_PTR2(weight, offset), weight->stride[0],
ROW_PTR2(output, h), output->stride[1]);
} else {
THError("index out of bound. updateOutput: %d not between 1 and %d",
offset + 1, inDim);
}
}
}
THTensor* output_row = THTensor_(new)();
for (h = 0; h < batchSize; h++) {
THTensor_(select)(output_row, output, 0, h);
THTensor_(cadd)(output_row, bias, 1.0, output_row);
}
THTensor_(free)(output_row);
THTensor_(free)(weight);
}
示例7: THNN_
void THNN_(SpatialConvolutionMM_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *finput,
THTensor *fgradInput,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH,
real scale)
{
int freeWeight = 0;
long nOutputPlane = gradWeight->size[0];
THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 3, "Number of output features is not equal to nOutputPlane" );
THArgCheck(kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
THArgCheck(dW > 0 && dH > 0, 10, "stride should be greater than zero");
THArgCheck(gradWeight->nDimension == 2 || gradWeight->nDimension == 4, 4, "gradWeight tensor should be 2D or 4D");
if (gradWeight->nDimension == 4) {
long s1 = gradWeight->size[0];
long s2 = gradWeight->size[1] * gradWeight->size[2] * gradWeight->size[3];
gradWeight = THTensor_(newWithStorage2d)(gradWeight->storage, gradWeight->storageOffset, s1, -1, s2, -1);
freeWeight = 1;
}
if(input->nDimension == 3)
{
THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale);
}
else
{
long T = input->size[0];
long t;
for(t = 0; t < T; t++)
{
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale);
THTensor_(free)(gradOutput_t);
THTensor_(free)(finput_t);
}
}
if (freeWeight)
THTensor_(free)(gradWeight);
}
示例8: THNN_
void THNN_(SpatialConvolutionMM_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
THTensor *finput,
THTensor *fgradInput,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH)
{
long nOutputPlane = weight->size[0];
THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 3, "Number of output features is not equal to nOutputPlane" );
THArgCheck(kW > 0 && kH > 0, 9, "kernel size should be greater than zero");
THArgCheck(dW > 0 && dH > 0, 11, "stride should be greater than zero");
THTensor_(resizeAs)(gradInput, input);
THTensor_(resizeAs)(fgradInput, finput);
THTensor_(transpose)(weight, weight, 0, 1);
if(input->nDimension == 3)
{
THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH, dW, dH, padW, padH);
}
else
{
long T = input->size[0];
long t;
#pragma omp parallel for private(t)
for(t = 0; t < T; t++)
{
THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH, dW, dH, padW, padH);
THTensor_(free)(gradInput_t);
THTensor_(free)(gradOutput_t);
THTensor_(free)(fgradInput_t);
}
}
THTensor_(transpose)(weight, weight, 0, 1);
}
示例9: THNN_
void THNN_(VolumetricConvolutionMM_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *finput,
int kT, int kW, int kH,
int dT, int dW, int dH,
int pT, int pW, int pH,
real scale)
{
int freeWeight;
int nOutputPlane = (int)gradWeight->size[0];
THNN_(VolumetricConvolutionMM_shapeCheck)(
state, input, gradOutput, gradWeight, gradBias,
kT, kW, kH, dT, dW, dH, pT, pW, pH);
input = THTensor_(newContiguous)(input);
gradOutput = THTensor_(newContiguous)(gradOutput);
freeWeight = THNN_(view_weight)(&gradWeight);
if (input->nDimension == 4) // non-batch mode
{
THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale);
}
else // batch mode
{
long T = input->size[0];
long t;
for (t = 0; t < T; t++)
{
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale);
THTensor_(free)(gradOutput_t);
THTensor_(free)(finput_t);
}
}
THTensor_(free)(input);
THTensor_(free)(gradOutput);
if (freeWeight)
THTensor_(free)(gradWeight);
}
示例10: THNN_
void THNN_(Linear_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
THTensor *bias,
THTensor *gradWeight,
THTensor *gradBias,
THTensor *addBuffer,
real scale)
{
long dim = THTensor_(nDimension)(input);
if (dim == 1) {
THTensor_(addr)(gradWeight,1,gradWeight,scale,gradOutput,input);
if (bias) {
THTensor_(cadd)(gradBias,gradBias,scale,gradOutput);
}
}
else if (dim == 2) {
THTensor_(transpose)(gradOutput,gradOutput,0,1);
THTensor_(addmm)(gradWeight,1,gradWeight,scale,gradOutput,input);
if (bias) {
THNN_(Linear_updateAddBuffer)(state,input,addBuffer);
THTensor_(addmv)(gradBias,1,gradBias,scale,gradOutput,addBuffer);
}
THTensor_(transpose)(gradOutput,gradOutput,0,1);
}
}
示例11: THNN_
static void THNN_(SpatialConvolutionLocal_updateGradInput_frame)
(THTensor *gradInput, THTensor *gradOutput,
THTensor *weight, THTensor *fgradInput,
int kW, int kH, int dW, int dH, int padW, int padH,
long nInputPlane, long inputWidth, long inputHeight,
long nOutputPlane, long outputWidth, long outputHeight)
{
THTensor *gradOutput3d, *fgradInput3d;
gradOutput3d = THTensor_(newWithStorage3d)(gradOutput->storage, gradOutput->storageOffset,
outputHeight*outputWidth, 1,
nOutputPlane, outputHeight*outputWidth,
1, nOutputPlane*outputHeight*outputWidth);
fgradInput3d = THTensor_(newWithStorage3d)(fgradInput->storage, fgradInput->storageOffset,
outputHeight*outputWidth, 1,
kW*kH*nInputPlane, outputHeight*outputWidth,
1, kW*kH*nInputPlane*outputHeight*outputWidth);
// weight: oH*oW x nInputPlane*kH*kW x nOutputPlane
// gradOutput3d: oH*oW x nOutputPlane x 1
THTensor_(baddbmm)(fgradInput3d, 0.0, fgradInput3d, 1.0, weight, gradOutput3d);
// fgradInput3d: oH*oW x nInputPlane*kH*kW x 1
THTensor_(free)(gradOutput3d);
THTensor_(free)(fgradInput3d);
THTensor_(zero)(gradInput);
THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, padW, padH,
nInputPlane, inputWidth, inputHeight,
outputWidth, outputHeight);
}
示例12: THNN_
void THNN_(Linear_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias,
THTensor *addBuffer)
{
int64_t dim = THTensor_(_nDimension)(input);
if (dim == 1) {
THTensor_(resize1d)(output,THTensor_(size)(weight,0));
if (bias) {
THTensor_(copy)(output,bias);
}
else {
THTensor_(zero)(output);
}
THTensor_(addmv)(output,1,output,1,weight,input);
}
else if (dim == 2) {
int64_t nframe = THTensor_(size)(input,0);
int64_t nElement = THTensor_(nElement)(output);
THTensor_(resize2d)(output,nframe,THTensor_(size)(weight,0));
if (THTensor_(nElement)(output) != nElement) {
THTensor_(zero)(output);
}
THNN_(Linear_updateAddBuffer)(state,input,addBuffer);
THTensor *tweight = THTensor_(new)();
THTensor_(transpose)(tweight,weight,0,1);
THTensor_(addmm)(output,0,output,1,input,tweight);
THTensor_(free)(tweight);
if (bias) {
THTensor_(addr)(output,1,output,1,addBuffer,bias);
}
}
示例13: THNN_
static void THNN_(VolumetricConvolutionMM_updateGradInput_frame)(
THTensor *gradInput,
THTensor *gradOutput,
THTensor *weight,
THTensor *fgradInput,
int kT,
int kW,
int kH,
int dT,
int dW,
int dH,
int pT,
int pW,
int pH)
{
THTensor *gradOutput2d = THTensor_(newWithStorage2d)(
gradOutput->storage, gradOutput->storageOffset,
gradOutput->size[0], -1,
gradOutput->size[1]*gradOutput->size[2]*gradOutput->size[3], -1
);
THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d);
THTensor_(free)(gradOutput2d);
THTensor_(zero)(gradInput);
THNN_(unfolded_acc_vol)(
fgradInput, gradInput,
kT, kW, kH,
dT, dW, dH,
pT, pW, pH,
gradInput->size[0], gradInput->size[1], gradInput->size[3], gradInput->size[2],
gradOutput->size[1], gradOutput->size[3], gradOutput->size[2]
);
}
示例14: THNN_
static void THNN_(SpatialConvolutionMM_updateGradInput_frame)(
THTensor *gradInput,
THTensor *gradOutput,
THTensor *weight,
THTensor *fgradInput,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH)
{
THTensor *gradOutput2d = THTensor_(newWithStorage2d)
(gradOutput->storage, gradOutput->storageOffset,
gradOutput->size[0], -1,
gradOutput->size[1]*gradOutput->size[2], -1);
THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d);
THTensor_(free)(gradOutput2d);
THTensor_(zero)(gradInput);
THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH,
padW, padH,
gradInput->size[0], gradInput->size[2], gradInput->size[1],
gradOutput->size[2], gradOutput->size[1]);
}
示例15: THNN_
void THNN_(SparseLinear_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias)
{
long h, i;
long outDim = THTensor_(size)(weight, 0);
long inDim = THTensor_(size)(weight, 1);
long batchSize = THTensor_(size)(output, 0);
THArgCheck(THNN_(checkInput)(input), 2, "input must be in coo format, nnz x 3");
THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous");
THArgCheck(THNN_(checkSize1D)(bias, outDim), 5, "bias size wrong");
long nnz = THTensor_(size)(input, 0);
// output = weight * input + bias
THTensor_(zero)(output);
#pragma omp parallel for private(i) schedule(static) if (nnz * outDim > 10000)
for (i = 0; i < nnz; i++) {
real val = THNN_(get2d)(input, i, 2);
if (val == 0) {
continue;
}
long offset = (long)(THNN_(get2d)(input, i, 1)) - 1;
long h = (long)(THNN_(get2d)(input, i, 0)) - 1;
if (offset >= 0 && offset < inDim) {
THBlas_(axpy)(outDim,
val,
COL_PTR2(weight, offset), weight->stride[0],
ROW_PTR2(output, h), output->stride[1]);
} else {
THError("index out of bound. updateOutput: %d not between 1 and %d",
offset + 1, inDim);
}
}
THTensor* output_row = THTensor_(new)();
for (h = 0; h < batchSize; h++) {
THTensor_(select)(output_row, output, 0, h);
THTensor_(cadd)(output_row, bias, 1.0, output_row);
}
THTensor_(free)(output_row);
}