本文整理汇总了C++中FLA_Finalize函数的典型用法代码示例。如果您正苦于以下问题:C++ FLA_Finalize函数的具体用法?C++ FLA_Finalize怎么用?C++ FLA_Finalize使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了FLA_Finalize函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
int main(int argc, char* argv[]){
dim_t order;
dim_t nA;
dim_t permutation[FLA_MAX_ORDER];
FLA_Init();
//Parse input
if(parse_input(argc, argv, &order, &nA, permutation) == FLA_FAILURE){
Usage();
FLA_Finalize();
return 0;
}
if(check_errors(order, nA, permutation) == FLA_FAILURE){
Usage();
FLA_Finalize();
return 0;
}
test_permute_tensor(order, nA, permutation);
FLA_Finalize();
return 0;
}
示例2: mexFunction
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
int attr[NINT];
FLA_Obj obj[NOBJ];
double *dtime;
FLA_Init();
/* Check if the number of arguments supplied is correct */
FLA_M2C_CheckNumArgs(NRHS, nrhs);
/* Convert Matlab arguments into the appropriate FLAME C arguments */
FLA_M2C_ConvertArgs(NRHS, prhs, NINT, attr, obj);
/* If an extra argument is supplied, collect timing informaion in it. */
if (nrhs == NRHS+1)
dtime = FLA_M2C_ConvertDoublePtr(prhs[NRHS]);
/* Now call the C FLAME function, timing it if the extra argument is given. */
if (nrhs == NRHS+1)
*dtime = FLA_Clock();
FLA_Axpyt_external(attr[0], obj[0], obj[1], obj[2]);
if (nrhs == NRHS+1)
*dtime = FLA_Clock() - *dtime;
FLA_Finalize();
}
示例3: phonopy_pinv_libflame
int phonopy_pinv_libflame(double *matrix,
double *eigvals,
const int size,
const double cutoff)
{
FLA_Obj A, B, l;
/* FLA_Obj C; */
double *inv_eigvals;
int i;
inv_eigvals = (double*)malloc(sizeof(double) * size);
FLA_Init();
FLA_Obj_create_without_buffer(FLA_DOUBLE, size, size, &A);
FLA_Obj_attach_buffer(matrix, 0, 0, &A);
FLA_Obj_create_without_buffer(FLA_DOUBLE, size, 1, &l);
FLA_Obj_attach_buffer(eigvals, 0, 0, &l);
/* Eigensolver */
FLA_Obj_create_copy_of(FLA_NO_TRANSPOSE, A, &B);
FLA_Hevd(FLA_EVD_WITH_VECTORS, FLA_LOWER_TRIANGULAR, B, l);
/* SVD */
/* FLA_Obj_create(FLA_DOUBLE, size, size, 0, 0, &B); */
/* use U */
/* FLA_Svd(FLA_SVD_VECTORS_ALL, FLA_SVD_VECTORS_NONE, A, l, B, C); */
/* use V */
/* FLA_Svd(FLA_SVD_VECTORS_NONE, FLA_SVD_VECTORS_ALL, A, l, C, B); */
FLA_Obj_free_without_buffer(&l);
for (i = 0; i < size; i++) {
if (eigvals[i] < cutoff) {
inv_eigvals[i] = 0;
} else {
inv_eigvals[i] = 1.0 / sqrt(eigvals[i]);
}
}
FLA_Obj_create_without_buffer(FLA_DOUBLE, size, 1, &l);
FLA_Obj_attach_buffer(inv_eigvals, 0, 0, &l);
FLA_Apply_diag_matrix(FLA_RIGHT, FLA_NO_CONJUGATE, l, B);
FLA_Syrk(FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, B, FLA_ZERO, A);
FLA_Symmetrize(FLA_LOWER_TRIANGULAR, A);
FLA_Obj_free_without_buffer(&A);
FLA_Obj_free_without_buffer(&l);
FLA_Obj_free(&B);
FLA_Finalize();
free(inv_eigvals);
return 0;
}
示例4: main
int main(int argc, char* argv[]){
dim_t order;
TLA_sym sym;
dim_t n[FLA_MAX_ORDER];
dim_t b[FLA_MAX_ORDER];
dim_t permutation[FLA_MAX_ORDER];
FLA_Obj T;
FLA_Init();
//Parse inputs
if(parse_input(argc, argv, &order, &sym, n, b, permutation) == FLA_FAILURE){
Usage();
FLA_Finalize();
return 0;
}
//Error check
if(check_errors(order, sym, n, b, permutation) == FLA_FAILURE){
Usage();
FLA_Finalize();
return 0;
}
//Perform test
create_psym_tensor(order, sym, n, b, &T);
FLA_Obj_print_matlab("T", T);
test_permute_tensor(permutation, T);
FLA_Obj_blocked_psym_tensor_free_buffer(&T);
FLA_Obj_free_without_buffer(&T);
FLA_Finalize();
return 0;
}
示例5: mexFunction
void mexFunction(int nargout, mxArray * pargout[],
int nargin, const mxArray * pargin[]){
if (nargin > 1)
{
mexErrMsgTxt("Too many input arguments.");
}
FLA_Init();
int i;
//Parse input
FLA_Obj A;
mexPrintf("Creating tensor\n");
TLA_mxa_to_tensor(pargin[0], &A);
FLA_Obj_print_matlab("A", A);
//Pass output
mexPrintf("Passing C back\n");
TLA_tensor_to_mxa(A, &(pargout[0]));
//DELETE EVERYTHING
mexPrintf("finalizing\n");
FLA_Finalize();
}
示例6: main
//.........这里部分代码省略.........
if ( pc_str[param_combo][0] == 'l' )
{
FLA_Obj_create( datatype, nb_alg, nb_alg, &A_flat );
FLASH_Obj_create( datatype, nb_alg, nb_alg, 1, &nb_alg, &A );
FLASH_Obj_create( datatype, nb_alg, nb_alg, 1, &nb_alg, &A_save );
FLA_Obj_create( datatype, bm, bn, &T_flat );
FLASH_Obj_create_ext( datatype, bm, bn, 1, &bm, &bn, &T );
FLASH_Obj_create_ext( datatype, bm, n, 1, &bm, &bn, &W );
}
else
{
FLASH_Obj_create( datatype, n, n, 1, &nb_alg, &A );
}
FLASH_Obj_create( datatype, nb_alg, n, 1, &nb_alg, &B );
FLASH_Obj_create( datatype, nb_alg, n, 1, &nb_alg, &B_ref );
FLA_Obj_create( datatype, nb_alg, 1, &t );
FLASH_Random_matrix( A );
FLASH_Random_matrix( B );
fprintf( stdout, "data_applyq_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p );
fflush( stdout );
FLASH_Copy( A, A_save );
FLASH_Obj_flatten( A, A_flat );
FLA_QR_blk_external( A_flat, t );
FLASH_Obj_hierarchify( A_flat, A );
time_Apply_Q( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n,
A, B, B_ref, t, T, W, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
FLASH_Copy( A_save, A );
FLASH_Obj_flatten( A, A_flat );
FLA_QR_UT( A_flat, t, T_flat );
FLASH_Obj_hierarchify( A_flat, A );
FLASH_Obj_hierarchify( T_flat, T );
time_Apply_Q( param_combo, FLA_ALG_FRONT, n_repeats, m, n,
A, B, B_ref, t, T, W, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
fprintf( stdout, " ]; \n" );
fflush( stdout );
FLASH_Obj_free( &A );
FLA_Obj_free( &A_flat );
FLASH_Obj_free( &B );
FLASH_Obj_free( &B_ref );
FLA_Obj_free( &t );
FLASH_Obj_free( &T );
FLA_Obj_free( &T_flat );
FLASH_Obj_free( &W );
}
fprintf( stdout, "\n" );
}
fprintf( stdout, "figure;\n" );
fprintf( stdout, "hold on;\n" );
for ( i = 0; i < n_param_combos; i++ ) {
fprintf( stdout, "plot( data_applyq_%s( :,1 ), data_applyq_%s( :, 2 ), '%c:%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
fprintf( stdout, "plot( data_applyq_%s( :,1 ), data_applyq_%s( :, 4 ), '%c-.%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
}
fprintf( stdout, "legend( ... \n" );
for ( i = 0; i < n_param_combos; i++ )
fprintf( stdout, "'ref\\_applyq\\_%s', 'fla\\_applyq\\_%s', ... \n", pc_str[i], pc_str[i] );
fprintf( stdout, "'Location', 'SouthEast' ); \n" );
fprintf( stdout, "xlabel( 'problem size p' );\n" );
fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
fprintf( stdout, "title( 'FLAME applyq front-end performance (%s, %s)' );\n",
m_dim_desc, n_dim_desc );
fprintf( stdout, "print -depsc applyq_front_%s_%s.eps\n", m_dim_tag, n_dim_tag );
fprintf( stdout, "hold off;\n");
fflush( stdout );
FLA_Finalize( );
return 0;
}
示例7: main
//.........这里部分代码省略.........
sprintf( output_file_m, "%s/%s_output.m", OUTPUT_PATH, OUTPUT_FILE );
fpp = fopen( output_file_m, "a" );
fprintf( fpp, "%%\n" );
fprintf( fpp, "%% | Matrix Size | FLASH |\n" );
fprintf( fpp, "%% | n x n | GFlops |\n" );
fprintf( fpp, "%% -----------------------------\n" );
fprintf( fpp, "%s_%u = [\n", OUTPUT_FILE, nb_alg );
#endif
FLA_Init();
dtimes = ( double * ) FLA_malloc( n_repeats * sizeof( double ) );
flops = ( double * ) FLA_malloc( n_trials * sizeof( double ) );
FLASH_Queue_set_num_threads( n_threads );
FLASH_Queue_set_sorting( sorting );
FLASH_Queue_set_caching( caching );
FLASH_Queue_set_work_stealing( work_stealing );
FLASH_Queue_set_data_affinity( data_affinity );
for ( i = 0; i < n_trials; i++ )
{
size = begin + i * increment;
FLA_Obj_create( datatype, size, size, 0, 0, &A );
FLA_Obj_create( datatype, size, size, 0, 0, &B );
FLA_Obj_create( datatype, size, 1, 0, 0, &x );
FLA_Obj_create( datatype, size, 1, 0, 0, &b );
FLA_Obj_create( datatype, 1, 1, 0, 0, &b_norm );
for ( j = 0; j < n_repeats; j++ )
{
FLA_Random_matrix( A );
FLA_Random_matrix( B );
FLA_Random_matrix( x );
FLA_Random_matrix( b );
FLA_Symmetrize( uplo, A );
FLA_Symmetrize( uplo, B );
length = ( double ) FLA_Obj_length( B );
FLA_Add_to_diag( &length, B );
FLA_Symv_external( uplo, FLA_ONE, B, x, FLA_ZERO, b );
FLASH_Obj_create_hier_copy_of_flat( A, 1, &nb_alg, &AH );
FLASH_Obj_create_hier_copy_of_flat( B, 1, &nb_alg, &BH );
FLASH_Chol( uplo, BH );
dtime = FLA_Clock();
FLASH_Eig_gest( inv, uplo, AH, BH );
dtime = FLA_Clock() - dtime;
dtimes[j] = dtime;
FLASH_Obj_free( &AH );
FLASH_Obj_free( &BH );
}
dtime = dtimes[0];
for ( j = 1; j < n_repeats; j++ )
dtime = min( dtime, dtimes[j] );
flops[i] = 1.0 * size * size * size / dtime / 1e9;
#ifdef FLA_ENABLE_WINDOWS_BUILD
fprintf( stdout, " %d %6.3f %le\n", size, flops[i], b_norm_value );
#else
fprintf( fpp, " %d %6.3f\n", size, flops[i] );
fprintf( stdout, "Time: %e | GFlops: %6.3f\n", dtime, flops[i] );
fprintf( stdout, "Matrix size: %u x %u | nb_alg: %u\n",
size, size, nb_alg );
fprintf( stdout, "Norm of difference: %le\n\n", b_norm_value );
#endif
FLA_Obj_free( &A );
FLA_Obj_free( &B );
FLA_Obj_free( &x );
FLA_Obj_free( &b );
FLA_Obj_free( &b_norm );
}
#ifdef FLA_ENABLE_WINDOWS_BUILD
fprintf( stdout, "];\n\n" );
#else
fprintf( fpp, "];\n" );
fflush( fpp );
fclose( fpp );
#endif
FLA_free( dtimes );
FLA_free( flops );
FLA_Finalize();
return 0;
}
示例8: main
//.........这里部分代码省略.........
sprintf( k_dim_tag, "k%dp", 1 );
}
//datatype = FLA_COMPLEX;
datatype = FLA_DOUBLE_COMPLEX;
FLASH_Queue_set_num_threads( n_threads );
for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
{
m = m_input;
k = k_input;
if( m < 0 ) m = p / abs(m_input);
if( k < 0 ) k = p / abs(k_input);
for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){
// If transposing A, switch dimensions.
if ( pc_str[param_combo][1] == 'n' )
{
FLASH_Obj_create( datatype, m, k, 1, &nb_alg, &A );
FLASH_Obj_create( datatype, m, k, 1, &nb_alg, &B );
}
else
{
FLASH_Obj_create( datatype, k, m, 1, &nb_alg, &A );
FLASH_Obj_create( datatype, k, m, 1, &nb_alg, &B );
}
FLASH_Obj_create( datatype, m, m, 1, &nb_alg, &C );
FLASH_Obj_create( datatype, m, m, 1, &nb_alg, &C_ref );
FLASH_Random_matrix( A );
FLASH_Random_matrix( B );
FLASH_Random_matrix( C );
fprintf( stdout, "data_her2k_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p );
fflush( stdout );
time_Her2k( param_combo, FLA_ALG_REFERENCE, n_repeats, m, k,
A, B, C, C_ref, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
time_Her2k( param_combo, FLA_ALG_FRONT, n_repeats, m, k,
A, B, C, C_ref, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
fprintf( stdout, " ]; \n" );
fflush( stdout );
FLASH_Obj_free( &A );
FLASH_Obj_free( &B );
FLASH_Obj_free( &C );
FLASH_Obj_free( &C_ref );
}
fprintf( stdout, "\n" );
}
/*
fprintf( stdout, "figure;\n" );
fprintf( stdout, "hold on;\n" );
for ( i = 0; i < n_param_combos; i++ ) {
fprintf( stdout, "plot( data_her2k_%s( :,1 ), data_her2k_%s( :, 2 ), '%c:%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
fprintf( stdout, "plot( data_her2k_%s( :,1 ), data_her2k_%s( :, 4 ), '%c-.%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
}
fprintf( stdout, "legend( ... \n" );
for ( i = 0; i < n_param_combos; i++ )
fprintf( stdout, "'ref\\_her2k\\_%s', 'fla\\_her2k\\_%s', ... \n", pc_str[i], pc_str[i] );
fprintf( stdout, "'Location', 'SouthEast' ); \n" );
fprintf( stdout, "xlabel( 'problem size p' );\n" );
fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
fprintf( stdout, "title( 'FLAME her2k front-end performance (%s, %s)' );\n",
m_dim_desc, k_dim_desc );
fprintf( stdout, "print -depsc her2k_front_%s_%s.eps\n", m_dim_tag, k_dim_tag );
fprintf( stdout, "hold off;\n");
fflush( stdout );
*/
FLA_Finalize( );
return 0;
}
示例9: main
//.........这里部分代码省略.........
if ( pc_str[param_combo][0] == 'c' ||
pc_str[param_combo][1] == 'c' )
{
if ( precision == FLA_SINGLE_PRECISION )
datatype = FLA_COMPLEX;
else
datatype = FLA_DOUBLE_COMPLEX;
}
else
{
if ( precision == FLA_SINGLE_PRECISION )
datatype = FLA_FLOAT;
else
datatype = FLA_DOUBLE;
}
// If transposing A, switch dimensions.
if ( pc_str[param_combo][0] == 'n' )
FLA_Obj_create( datatype, m, k, 0, 0, &A );
else
FLA_Obj_create( datatype, k, m, 0, 0, &A );
// If transposing B, switch dimensions.
if ( pc_str[param_combo][1] == 'n' )
FLA_Obj_create( datatype, k, n, 0, 0, &B );
else
FLA_Obj_create( datatype, n, k, 0, 0, &B );
FLA_Obj_create( datatype, m, n, 0, 0, &C );
FLA_Obj_create( datatype, m, n, 0, 0, &C_ref );
FLA_Random_matrix( A );
FLA_Random_matrix( B );
FLA_Random_matrix( C );
FLA_Copy_external( C, C_ref );
fprintf( stdout, "data_gemm_%s( %d, 1:5 ) = [ %4d %4d %4d ", pc_str[param_combo], i, m, k, n );
fflush( stdout );
time_Gemm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, k, n,
A, B, C, C_ref, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
/*
time_Gemm( param_combo, FLA_ALG_FRONT, n_repeats, m, k, n,
A, B, C, C_ref, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
*/
fprintf( stdout, " ]; \n" );
fflush( stdout );
FLA_Obj_free( &A );
FLA_Obj_free( &B );
FLA_Obj_free( &C );
FLA_Obj_free( &C_ref );
}
fprintf( stdout, "\n" );
}
/*
fprintf( stdout, "figure;\n" );
fprintf( stdout, "hold on;\n" );
for ( i = 0; i < n_param_combos; i++ ) {
fprintf( stdout, "plot( data_gemm_%s( :,1 ), data_gemm_%s( :, 2 ), '%c:%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
fprintf( stdout, "plot( data_gemm_%s( :,1 ), data_gemm_%s( :, 4 ), '%c-.%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
}
fprintf( stdout, "legend( ... \n" );
for ( i = 0; i < n_param_combos; i++ )
fprintf( stdout, "'ref\\_gemm\\_%s', 'fla\\_gemm\\_%s', ... \n", pc_str[i], pc_str[i] );
fprintf( stdout, "'Location', 'SouthEast' ); \n" );
fprintf( stdout, "xlabel( 'problem size p' );\n" );
fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
fprintf( stdout, "title( 'FLAME gemm front-end performance (%s, %s, %s)' );\n",
m_dim_desc, k_dim_desc, n_dim_desc );
fprintf( stdout, "print -depsc gemm_front_%s_%s_%s.eps\n", m_dim_tag, k_dim_tag, n_dim_tag );
fprintf( stdout, "hold off;\n");
fflush( stdout );
*/
FLA_Finalize( );
return 0;
}
示例10: main
//.........这里部分代码省略.........
FLA_Obj_create( datatype, n, n, 0, 0, &B );
FLA_Obj_create( datatype, m, n, 0, 0, &C );
FLA_Obj_create( datatype, m, n, 0, 0, &C_ref );
if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX )
{
FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &scale );
FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm );
}
else if ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX )
{
FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &scale );
FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm );
}
FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A );
FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, B );
FLA_Random_matrix( C );
FLA_Norm1( A, norm );
FLA_Shift_diag( FLA_NO_CONJUGATE, norm, A );
FLA_Norm1( B, norm );
if ( FLA_Obj_is( isgn, FLA_MINUS_ONE ) )
FLA_Negate( norm );
FLA_Shift_diag( FLA_NO_CONJUGATE, norm, B );
time_Sylv_nn( 0, FLA_ALG_REFERENCE, n_repeats, m, n, nb_alg,
isgn, A, B, C, C_ref, scale, &dtime, &diff, &gflops );
fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; \n", i, p, gflops );
fflush( stdout );
for ( variant = 1; variant <= n_variants; variant++ ){
fprintf( stdout, "data_var%d( %d, 1:3 ) = [ %d ", variant, i, p );
fflush( stdout );
time_Sylv_nn( variant, FLA_ALG_UNB_OPT, n_repeats, m, n, nb_alg,
isgn, A, B, C, C_ref, scale, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
time_Sylv_nn( variant, FLA_ALG_BLOCKED, n_repeats, m, n, nb_alg,
isgn, A, B, C, C_ref, scale, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
fprintf( stdout, " ]; \n" );
fflush( stdout );
}
FLA_Obj_free( &A );
FLA_Obj_free( &B );
FLA_Obj_free( &C );
FLA_Obj_free( &C_ref );
FLA_Obj_free( &scale );
FLA_Obj_free( &norm );
fprintf( stdout, "\n" );
}
/* Print the MATLAB commands to plot the data */
/* Delete all existing figures */
fprintf( stdout, "figure;\n" );
/* Plot the performance of the reference implementation */
fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" );
/* Indicate that you want to add to the existing plot */
fprintf( stdout, "hold on;\n" );
/* Plot the data for the other numbers of threads */
for ( i = 1; i <= n_variants; i++ ){
fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); \n",
i, i, colors[ i-1 ], ticks[ i-1 ] );
}
fprintf( stdout, "legend( ... \n" );
fprintf( stdout, "'Reference', ... \n" );
for ( i = 1; i <= n_variants; i++ )
fprintf( stdout, "'FLAME var%d', ... \n", i );
fprintf( stdout, "'Location', 'SouthEast' ); \n" );
fprintf( stdout, "xlabel( 'problem size p' );\n" );
fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
fprintf( stdout, "title( 'FLAME sylv\\_nn performance (%s)' );\n",
m_dim_desc );
fprintf( stdout, "print -depsc sylv_nn_%s.eps\n", m_dim_tag );
fprintf( stdout, "hold off;\n");
fflush( stdout );
FLA_Finalize( );
}
示例11: main
//.........这里部分代码省略.........
}
else if( n_input == -1 ) {
sprintf( n_dim_desc, "n = p" );
sprintf( n_dim_tag, "n%dp", 1 );
}
//datatype = FLA_FLOAT;
//datatype = FLA_DOUBLE;
//datatype = FLA_COMPLEX;
datatype = FLA_DOUBLE_COMPLEX;
for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
{
m = m_input;
n = n_input;
if( m < 0 ) m = p / abs(m_input);
if( n < 0 ) n = p / abs(n_input);
for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){
// If multiplying A on the left, A is m x m; ...on the right, A is n x n.
if ( pc_str[param_combo][0] == 'l' )
FLA_Obj_create( datatype, m, m, 0, 0, &A );
else
FLA_Obj_create( datatype, n, n, 0, 0, &A );
FLA_Obj_create( datatype, m, n, 0, 0, &B );
FLA_Obj_create( datatype, m, n, 0, 0, &C );
FLA_Obj_create( datatype, m, n, 0, 0, &C_ref );
FLA_Random_matrix( A );
FLA_Random_matrix( B );
FLA_Random_matrix( C );
FLA_Copy_external( C, C_ref );
fprintf( stdout, "data_symm_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p );
fflush( stdout );
time_Symm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n,
A, B, C, C_ref, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
time_Symm( param_combo, FLA_ALG_FRONT, n_repeats, m, n,
A, B, C, C_ref, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
fprintf( stdout, " ]; \n" );
fflush( stdout );
FLA_Obj_free( &A );
FLA_Obj_free( &B );
FLA_Obj_free( &C );
FLA_Obj_free( &C_ref );
}
fprintf( stdout, "\n" );
}
/*
fprintf( stdout, "figure;\n" );
fprintf( stdout, "hold on;\n" );
for ( i = 0; i < n_param_combos; i++ ) {
fprintf( stdout, "plot( data_symm_%s( :,1 ), data_symm_%s( :, 2 ), '%c:%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
fprintf( stdout, "plot( data_symm_%s( :,1 ), data_symm_%s( :, 4 ), '%c-.%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
}
fprintf( stdout, "legend( ... \n" );
for ( i = 0; i < n_param_combos; i++ )
fprintf( stdout, "'ref\\_symm\\_%s', 'fla\\_symm\\_%s', ... \n", pc_str[i], pc_str[i] );
fprintf( stdout, "'Location', 'SouthEast' ); \n" );
fprintf( stdout, "xlabel( 'problem size p' );\n" );
fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
fprintf( stdout, "title( 'FLAME symm front-end performance (%s, %s)' );\n",
m_dim_desc, n_dim_desc );
fprintf( stdout, "print -depsc symm_front_%s_%s.eps\n", m_dim_tag, n_dim_tag );
fprintf( stdout, "hold off;\n");
fflush( stdout );
*/
FLA_Finalize( );
return 0;
}
示例12: main
//.........这里部分代码省略.........
FLA_Random_unitary_matrix( Q );
//FLA_Fill_with_uniform_dist( FLA_ONE, l );
//FLA_Fill_with_inverse_dist( FLA_ONE, l );
FLA_Fill_with_geometric_dist( alpha, l );
{
FLA_Copy( Q, Ql );
FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, l, Ql );
FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_ONE, Ql, Q, FLA_ZERO, A );
FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, A );
FLA_Copy( A, A_orig );
}
FLA_Set( FLA_ZERO, l );
FLA_Set( FLA_ZERO, Q );
FLA_Tridiag_UT_create_T( A, &TT );
FLA_Tridiag_UT( FLA_LOWER_TRIANGULAR, A, TT );
FLA_Tridiag_UT_realify( FLA_LOWER_TRIANGULAR, A, r );
FLA_Tridiag_UT_extract_diagonals( FLA_LOWER_TRIANGULAR, A, d, e );
FLA_Tridiag_UT_form_Q( FLA_LOWER_TRIANGULAR, A, TT );
FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE, r, A );
FLA_Obj_free( &TT );
time_Tevd_v( 0, FLA_ALG_REFERENCE, n_repeats, m, k_accum, b_alg, n_iter_max,
A_orig, d, e, G, R, W2, A, l, &dtime, &diff1, &diff2, &gflops );
fprintf( stdout, "data_REFq( %d, 1:3 ) = [ %d %6.3lf %9.2e %6.2le %6.2le ]; \n", i, p, gflops, dtime, diff1, diff2 );
fflush( stdout );
for ( variant = 1; variant <= n_variants; variant++ ){
fprintf( stdout, "data_var%d( %d, 1:3 ) = [ %d ", variant, i, p );
fflush( stdout );
time_Tevd_v( variant, FLA_ALG_UNB_OPT, n_repeats, m, k_accum, b_alg, n_iter_max,
A_orig, d, e, G, R, W2, A, l, &dtime, &diff1, &diff2, &gflops );
fprintf( stdout, "%6.3lf %9.2e %6.2le %6.2le ", gflops, dtime, diff1, diff2 );
fflush( stdout );
fprintf( stdout, "];\n" );
fflush( stdout );
}
fprintf( stdout, "\n" );
FLA_Obj_free( &A );
FLA_Obj_free( &A_orig );
FLA_Obj_free( &Q );
FLA_Obj_free( &Ql );
FLA_Obj_free( &G );
FLA_Obj_free( &W2 );
FLA_Obj_free( &r );
FLA_Obj_free( &l );
FLA_Obj_free( &d );
FLA_Obj_free( &e );
FLA_Obj_free( &R );
FLA_Obj_free( &alpha );
}
/*
fprintf( stdout, "figure;\n" );
fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" );
fprintf( stdout, "hold on;\n" );
for ( i = 1; i <= n_variants; i++ ) {
fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); \n",
i, i, colors[ i-1 ], ticks[ i-1 ] );
fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 4 ), '%c-.%c' ); \n",
i, i, colors[ i-1 ], ticks[ i-1 ] );
}
fprintf( stdout, "legend( ... \n" );
fprintf( stdout, "'Reference', ... \n" );
for ( i = 1; i < n_variants; i++ )
fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d', ... \n", i, i );
fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d' ); \n", i, i );
fprintf( stdout, "xlabel( 'problem size p' );\n" );
fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
fprintf( stdout, "title( 'FLAME Hevd_lv performance (%s, %s)' );\n",
m_dim_desc, n_dim_desc );
fprintf( stdout, "print -depsc tridiag_%s_%s.eps\n", m_dim_tag, n_dim_tag );
fprintf( stdout, "hold off;\n");
fflush( stdout );
*/
FLA_Finalize( );
return 0;
}
示例13: main
//.........这里部分代码省略.........
for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
{
m = m_input;
if( m < 0 ) m = p / f2c_abs(m_input);
FLA_Obj_create( datatype, m, m, 0, 0, &A );
FLA_Obj_create( datatype, m, 1, 0, 0, &b );
FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig );
/*
FLA_Obj_create( datatype, m, m, m, 1, &A );
FLA_Obj_create( datatype, m, 1, 1, 1, &b );
FLA_Obj_create( datatype, m, 1, 1, 1, &b_orig );
*/
if ( FLA_Obj_is_single_precision( A ) )
FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm );
else
FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm );
FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A );
FLA_Random_matrix( b );
FLA_Copy_external( b, b_orig );
/*
time_Trinv_un( 0, FLA_ALG_REFERENCE, n_repeats, m, nb_alg,
A, b, b_orig, norm, &dtime, &diff, &gflops );
fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; \n", i, p, gflops );
fflush( stdout );
*/
for ( variant = 1; variant <= n_variants; variant++ ){
fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d ", variant, i, p );
fflush( stdout );
time_Trinv_un( variant, FLA_ALG_UNBLOCKED, n_repeats, m, nb_alg,
A, b, b_orig, norm, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
time_Trinv_un( variant, FLA_ALG_UNB_OPT, n_repeats, m, nb_alg,
A, b, b_orig, norm, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
time_Trinv_un( variant, FLA_ALG_BLOCKED, n_repeats, m, nb_alg,
A, b, b_orig, norm, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
fprintf( stdout, " ]; \n" );
fflush( stdout );
}
FLA_Obj_free( &A );
FLA_Obj_free( &b );
FLA_Obj_free( &b_orig );
FLA_Obj_free( &norm );
fprintf( stdout, "\n" );
}
/*
fprintf( stdout, "figure;\n" );
fprintf( stdout, "hold on;\n" );
fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" );
for ( i = 1; i <= n_variants; i++ ){
fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); \n",
variant, variant, colors[ i ], ticks[ i ] );
}
fprintf( stdout, "legend( ... \n" );
fprintf( stdout, "'Reference', ... \n" );
for ( i = 1; i <= n_variants; i++ )
fprintf( stdout, "'FLAME var%d', ... \n", i );
fprintf( stdout, "'Location', 'SouthWest' ); \n" );
fprintf( stdout, "xlabel( 'problem size p' );\n" );
fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
fprintf( stdout, "title( 'FLAME trinv\\_u performance (%s)' );\n",
m_dim_desc );
fprintf( stdout, "print -depsc trinv_l_%s.eps\n", m_dim_tag );
fprintf( stdout, "hold off;\n");
fflush( stdout );
*/
FLA_Finalize( );
}
示例14: main
//.........这里部分代码省略.........
if( m < 0 ) m = p / f2c_abs(m_input);
//datatype = FLA_FLOAT;
//datatype = FLA_DOUBLE;
//datatype = FLA_COMPLEX;
datatype = FLA_DOUBLE_COMPLEX;
FLA_Obj_create( datatype, m, m, 0, 0, &A );
FLA_Obj_create( datatype, m, m, 0, 0, &Y );
FLA_Obj_create( datatype, m, m, 0, 0, &B );
FLA_Random_spd_matrix( uplo, A );
FLA_Hermitianize( uplo, A );
FLA_Random_spd_matrix( uplo, B );
FLA_Chol( uplo, B );
/*
time_Eig_gest_nu( 0, FLA_ALG_REFERENCE, n_repeats, p, b_alg,
inv, uplo, A, B, &dtime, &diff, &gflops );
fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; \n", i, p, gflops );
fflush( stdout );
*/
for ( variant = 1; variant <= n_variants; variant++ ){
fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d ", variant, i, p );
fflush( stdout );
time_Eig_gest_nu( variant, FLA_ALG_UNBLOCKED, n_repeats, p, b_alg,
inv, uplo, A, Y, B, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
time_Eig_gest_nu( variant, FLA_ALG_UNB_OPT, n_repeats, p, b_alg,
inv, uplo, A, Y, B, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
time_Eig_gest_nu( variant, FLA_ALG_BLOCKED, n_repeats, p, b_alg,
inv, uplo, A, Y, B, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
fprintf( stdout, " ]; \n" );
fflush( stdout );
}
FLA_Obj_free( &A );
FLA_Obj_free( &Y );
FLA_Obj_free( &B );
fprintf( stdout, "\n" );
}
/*
// Print the MATLAB commands to plot the data
// Delete all existing figures
fprintf( stdout, "figure;\n" );
// Plot the performance of the reference implementation
fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" );
// Indicate that you want to add to the existing plot
fprintf( stdout, "hold on;\n" );
// Plot the data for the other numbers of threads
for ( i = 1; i <= n_variants; i++ ){
fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); \n",
i, i, colors[ i-1 ], ticks[ i-1 ] );
}
fprintf( stdout, "legend( ... \n" );
fprintf( stdout, "'Reference', ... \n" );
for ( i = 1; i <= n_variants; i++ )
fprintf( stdout, "'FLAME var%d', ... \n", i );
fprintf( stdout, "'Location', 'SouthEast' ); \n" );
fprintf( stdout, "xlabel( 'problem size p' );\n" );
fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
fprintf( stdout, "title( 'FLAME chol\\_l performance (%s)' );\n",
m_dim_desc );
fprintf( stdout, "print -depsc chol_l_%s.eps\n", m_dim_tag );
fprintf( stdout, "hold off;\n");
fflush( stdout );
*/
FLA_Finalize( );
return 0;
}
示例15: main
int main( int argc, char *argv[] )
{
int
m_input, n_input,
m, n, rs, cs,
i,
datatype;
int blocksize[3];
int depth;
double buffer[64];
double buffer2[64];
FLA_Obj Af, Ah, Bh;
FLA_Init();
fprintf( stdout, "%c Enter hierarchy depth:", '%' );
scanf( "%d", &depth );
fprintf( stdout, "%c %d\n", '%', depth );
for ( i = 0; i < depth; ++i )
{
fprintf( stdout, "%c Enter blocksize %d:", '%', i );
scanf( "%d", &blocksize[i] );
fprintf( stdout, "%c %d\n", '%', blocksize[i] );
}
fprintf( stdout, "%c enter m n: ", '%' );
scanf( "%d%d", &m_input, &n_input );
fprintf( stdout, "%c %d %d\n", '%', m_input, n_input );
datatype = FLA_DOUBLE;
m = m_input;
n = n_input;
rs = 1;
cs = m_input;
for( i = 0; i < 64; i++ ) buffer[i] = ( double ) i;
for( i = 0; i < 64; i++ ) buffer2[i] = ( double ) 0;
//FLASH_Obj_create( datatype, m, n, depth, blocksize, &Ah );
FLASH_Obj_create_without_buffer( datatype, m, n, depth, blocksize, &Ah );
FLASH_Obj_attach_buffer( buffer, rs, cs, &Ah );
//FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, Ah, &Af );
//FLASH_Obj_create_hier_conf_to_flat( FLA_NO_TRANSPOSE, Af, depth, blocksize, &Bh );
//FLASH_Obj_create_flat_copy_of_hier( Ah, &Af );
//FLASH_Obj_create_hier_copy_of_flat( Af, depth, blocksize, &Bh );
FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, Ah, &Bh );
//FLASH_Axpy( FLA_TWO, Ah, Bh );
FLASH_Copy( Ah, Bh );
//FLA_Obj_create_without_buffer( datatype, 4, 4, &Af );
//FLA_Obj_attach_buffer( buffer2, 4, &Af );
//FLASH_Axpy_flat_to_hier( FLA_TWO, Af, 1, 1, Ah );
//FLASH_Axpy_hier_to_flat( FLA_TWO, 1, 1, Ah, Af );
//FLASH_Axpy_buffer_to_hier( FLA_ONE, 4, 4, buffer, 4, 1, 1, Ah );
//FLASH_Axpy_hier_to_buffer( FLA_ONE, 2, 2, Ah, 4, 4, buffer2, 4 );
//fprintf( stderr, "T: Am An = %d %d\n", FLASH_Obj_scalar_length( Ah ),
// FLASH_Obj_scalar_width( Ah ) );
//FLASH_Random_matrix( Ah );
//fprintf( stderr, "depth = %d\n", FLASH_Obj_depth( Ah ) );;
/*
{
int depth;
int b_m[4];
int b_n[4];
depth = FLASH_Obj_blocksizes( Bh, b_m, b_n );
fprintf( stderr, "depth = %d\n", depth );;
fprintf( stderr, "b_m[0] = %d\n", b_m[0] );;
fprintf( stderr, "b_n[0] = %d\n", b_n[0] );;
}
*/
FLASH_Obj_show( "", Ah, "%11.4e", "" );
FLASH_Obj_show( "", Bh, "%11.4e", "" );
//FLA_Obj_show( "", Af, "%11.4e", "" );
//FLASH_print_struct( Ah );
//fprintf( stderr, "max_diff = %e\n", FLASH_Max_elemwise_diff( Ah, Bh ) );;
//FLASH_Obj_free_without_buffer( &Ah );
//FLASH_Obj_free( &Af );
//FLA_Obj_free( &Af );
FLA_Finalize();
return 0;
}