本文整理匯總了C++中FLA_Obj_free函數的典型用法代碼示例。如果您正苦於以下問題:C++ FLA_Obj_free函數的具體用法?C++ FLA_Obj_free怎麽用?C++ FLA_Obj_free使用的例子?那麽, 這裏精選的函數代碼示例或許可以為您提供幫助。
在下文中一共展示了FLA_Obj_free函數的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的C++代碼示例。
示例1: main
//.........這裏部分代碼省略.........
diff = FLA_Max_elemwise_diff( C, Cref );
printf( "data_unb_var8( %d, 1:3 ) = [ %d %le %le];\n", i, n,
gflops / dtime_best, diff );
fflush( stdout );
#endif
#if TEST_BLK_VAR8==TRUE
/* Variant 4 blocked */
for ( irep=0; irep<nrepeats; irep++ ){
FLA_Copy( Cold, C );
dtime = FLA_Clock();
Symm_blk_var8( A, B, C, nb_alg );
dtime = FLA_Clock() - dtime;
if ( irep == 0 )
dtime_best = dtime;
else
dtime_best = ( dtime < dtime_best ? dtime : dtime_best );
}
diff = FLA_Max_elemwise_diff( C, Cref );
printf( "data_blk_var8( %d, 1:3 ) = [ %d %le %le];\n", i, n,
gflops / dtime_best, diff );
fflush( stdout );
#endif
FLA_Obj_free( &A );
FLA_Obj_free( &B );
FLA_Obj_free( &C );
FLA_Obj_free( &Cref );
FLA_Obj_free( &Cold );
printf( "\n" );
i++;
}
/* Print the MATLAB commands to plot the data */
/* Delete all existing figures */
printf( "close all\n" );
/* Plot the performance of FLAME */
printf( "plot( data_FLAME( :,1 ), data_FLAME( :, 2 ), 'k--' ); \n" );
/* Indicate that you want to add to the existing plot */
printf( "hold on\n" );
/* Plot the performance of the reference implementation */
// printf( "plot( data_REF( :,1 ), data_REF( :, 2 ), 'k-' ); \n" );
/* Plot the performance of your implementations */
#if TEST_UNB_VAR1==TRUE
printf( "plot( data_unb_var1( :,1 ), data_unb_var1( :, 2 ), 'r-.' ); \n" );
#endif
#if TEST_UNB_VAR2==TRUE
printf( "plot( data_unb_var2( :,1 ), data_unb_var2( :, 2 ), 'g-.' ); \n" );
#endif
#if TEST_UNB_VAR3==TRUE
示例2: main
//.........這裏部分代碼省略.........
if ( pc_str[param_combo][0] == 'c' ||
pc_str[param_combo][1] == 'c' )
{
if ( precision == FLA_SINGLE_PRECISION )
datatype = FLA_COMPLEX;
else
datatype = FLA_DOUBLE_COMPLEX;
}
else
{
if ( precision == FLA_SINGLE_PRECISION )
datatype = FLA_FLOAT;
else
datatype = FLA_DOUBLE;
}
// If transposing A, switch dimensions.
if ( pc_str[param_combo][0] == 'n' )
FLA_Obj_create( datatype, m, k, 0, 0, &A );
else
FLA_Obj_create( datatype, k, m, 0, 0, &A );
// If transposing B, switch dimensions.
if ( pc_str[param_combo][1] == 'n' )
FLA_Obj_create( datatype, k, n, 0, 0, &B );
else
FLA_Obj_create( datatype, n, k, 0, 0, &B );
FLA_Obj_create( datatype, m, n, 0, 0, &C );
FLA_Obj_create( datatype, m, n, 0, 0, &C_ref );
FLA_Random_matrix( A );
FLA_Random_matrix( B );
FLA_Random_matrix( C );
FLA_Copy_external( C, C_ref );
fprintf( stdout, "data_gemm_%s( %d, 1:5 ) = [ %4d %4d %4d ", pc_str[param_combo], i, m, k, n );
fflush( stdout );
time_Gemm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, k, n,
A, B, C, C_ref, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
/*
time_Gemm( param_combo, FLA_ALG_FRONT, n_repeats, m, k, n,
A, B, C, C_ref, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
*/
fprintf( stdout, " ]; \n" );
fflush( stdout );
FLA_Obj_free( &A );
FLA_Obj_free( &B );
FLA_Obj_free( &C );
FLA_Obj_free( &C_ref );
}
fprintf( stdout, "\n" );
}
/*
fprintf( stdout, "figure;\n" );
fprintf( stdout, "hold on;\n" );
for ( i = 0; i < n_param_combos; i++ ) {
fprintf( stdout, "plot( data_gemm_%s( :,1 ), data_gemm_%s( :, 2 ), '%c:%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
fprintf( stdout, "plot( data_gemm_%s( :,1 ), data_gemm_%s( :, 4 ), '%c-.%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
}
fprintf( stdout, "legend( ... \n" );
for ( i = 0; i < n_param_combos; i++ )
fprintf( stdout, "'ref\\_gemm\\_%s', 'fla\\_gemm\\_%s', ... \n", pc_str[i], pc_str[i] );
fprintf( stdout, "'Location', 'SouthEast' ); \n" );
fprintf( stdout, "xlabel( 'problem size p' );\n" );
fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
fprintf( stdout, "title( 'FLAME gemm front-end performance (%s, %s, %s)' );\n",
m_dim_desc, k_dim_desc, n_dim_desc );
fprintf( stdout, "print -depsc gemm_front_%s_%s_%s.eps\n", m_dim_tag, k_dim_tag, n_dim_tag );
fprintf( stdout, "hold off;\n");
fflush( stdout );
*/
FLA_Finalize( );
return 0;
}
示例3: main
//.........這裏部分代碼省略.........
for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
{
m = m_input;
if( m < 0 ) m = p / abs(m_input);
FLA_Obj_create( datatype, m, m, 0, 0, &A );
FLA_Obj_create( datatype, m, 1, 0, 0, &b );
FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig );
/*
FLA_Obj_create( datatype, m, m, m, 1, &A );
FLA_Obj_create( datatype, m, 1, 1, 1, &b );
FLA_Obj_create( datatype, m, 1, 1, 1, &b_orig );
*/
if ( FLA_Obj_is_single_precision( A ) )
FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm );
else
FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm );
FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_UNIT_DIAG, A );
FLA_Random_matrix( b );
FLA_Copy_external( b, b_orig );
/*
time_Trinv_uu( 0, FLA_ALG_REFERENCE, n_repeats, m, nb_alg,
A, b, b_orig, norm, &dtime, &diff, &gflops );
fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; \n", i, p, gflops );
fflush( stdout );
*/
for ( variant = 1; variant <= n_variants; variant++ ){
fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d ", variant, i, p );
fflush( stdout );
time_Trinv_uu( variant, FLA_ALG_UNBLOCKED, n_repeats, m, nb_alg,
A, b, b_orig, norm, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
time_Trinv_uu( variant, FLA_ALG_UNB_OPT, n_repeats, m, nb_alg,
A, b, b_orig, norm, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
time_Trinv_uu( variant, FLA_ALG_BLOCKED, n_repeats, m, nb_alg,
A, b, b_orig, norm, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
fprintf( stdout, " ]; \n" );
fflush( stdout );
}
FLA_Obj_free( &A );
FLA_Obj_free( &b );
FLA_Obj_free( &b_orig );
FLA_Obj_free( &norm );
fprintf( stdout, "\n" );
}
/*
fprintf( stdout, "figure;\n" );
fprintf( stdout, "hold on;\n" );
fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" );
for ( i = 1; i <= n_variants; i++ ){
fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); \n",
variant, variant, colors[ i ], ticks[ i ] );
}
fprintf( stdout, "legend( ... \n" );
fprintf( stdout, "'Reference', ... \n" );
for ( i = 1; i <= n_variants; i++ )
fprintf( stdout, "'FLAME var%d', ... \n", i );
fprintf( stdout, "'Location', 'SouthWest' ); \n" );
fprintf( stdout, "xlabel( 'problem size p' );\n" );
fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
fprintf( stdout, "title( 'FLAME trinv\\_u performance (%s)' );\n",
m_dim_desc );
fprintf( stdout, "print -depsc trinv_l_%s.eps\n", m_dim_tag );
fprintf( stdout, "hold off;\n");
fflush( stdout );
*/
FLA_Finalize( );
}
示例4: FLA_Hevd_lv_var3_components
FLA_Error FLA_Hevd_lv_var3_components( dim_t n_iter_max, FLA_Obj A, FLA_Obj l, dim_t k_accum, dim_t b_alg,
double* dtime_tred, double* dtime_tevd, double* dtime_appq )
{
FLA_Error r_val = FLA_SUCCESS;
FLA_Uplo uplo = FLA_LOWER_TRIANGULAR;
FLA_Datatype dt;
FLA_Datatype dt_real;
FLA_Datatype dt_comp;
FLA_Obj T, r, d, e, G;
FLA_Obj d0, e0, ls, pu;
dim_t mn_A;
dim_t n_G = k_accum;
double dtime_temp;
mn_A = FLA_Obj_length( A );
dt = FLA_Obj_datatype( A );
dt_real = FLA_Obj_datatype_proj_to_real( A );
dt_comp = FLA_Obj_datatype_proj_to_complex( A );
// If the matrix is a scalar, then the EVD is easy.
if ( mn_A == 1 )
{
FLA_Copy( A, l );
FLA_Set( FLA_ONE, A );
return FLA_SUCCESS;
}
// Create a matrix to hold block Householder transformations.
FLA_Tridiag_UT_create_T( A, &T );
// Create a vector to hold the realifying scalars.
FLA_Obj_create( dt, mn_A, 1, 0, 0, &r );
// Create vectors to hold the diagonal and sub-diagonal.
FLA_Obj_create( dt_real, mn_A, 1, 0, 0, &d );
FLA_Obj_create( dt_real, mn_A-1, 1, 0, 0, &e );
FLA_Obj_create( dt_real, mn_A, 1, 0, 0, &d0 );
FLA_Obj_create( dt_real, mn_A-1, 1, 0, 0, &e0 );
FLA_Obj_create( dt_real, mn_A, 1, 0, 0, &pu );
FLA_Obj_create( FLA_INT, mn_A, 1, 0, 0, &ls );
FLA_Obj_create( dt_comp, mn_A-1, n_G, 0, 0, &G );
dtime_temp = FLA_Clock();
{
// Reduce the matrix to tridiagonal form.
FLA_Tridiag_UT( uplo, A, T );
}
*dtime_tred = FLA_Clock() - dtime_temp;
// Apply scalars to rotate elements on the sub-diagonal to the real domain.
FLA_Tridiag_UT_realify( uplo, A, r );
// Extract the diagonal and sub-diagonal from A.
FLA_Tridiag_UT_extract_diagonals( uplo, A, d, e );
dtime_temp = FLA_Clock();
{
// Form Q, overwriting A.
FLA_Tridiag_UT_form_Q( uplo, A, T );
}
*dtime_appq = FLA_Clock() - dtime_temp;
// Apply the scalars in r to Q.
FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE, r, A );
// Find the eigenvalues only.
FLA_Copy( d, d0 ); FLA_Copy( e, e0 );
//r_val = FLA_Tevd_n_opt_var1( n_iter_max, d0, e0, G, A );
{
int info;
double* buff_d = FLA_DOUBLE_PTR( d0 );
double* buff_e = FLA_DOUBLE_PTR( e0 );
dsterf_( &mn_A, buff_d, buff_e, &info );
}
FLA_Sort( FLA_FORWARD, d0 );
FLA_Set( FLA_ZERO, ls );
FLA_Set( FLA_ZERO, pu );
dtime_temp = FLA_Clock();
{
// Perform an eigenvalue decomposition on the tridiagonal matrix.
r_val = FLA_Tevd_v_opt_var3( n_iter_max, d, e, d0, ls, pu, G, A, b_alg );
}
*dtime_tevd = FLA_Clock() - dtime_temp;
//FLA_Obj_show( "var4: e", e, "%22.15e", "" );
// Copy the converged eigenvalues to the output vector.
FLA_Copy( d, l );
// Sort the eigenvalues and eigenvectors in ascending order.
FLA_Sort_evd( FLA_FORWARD, l, A );
//FLA_Obj_show( "var4: d", l, "%22.15e", "" );
//FLA_Obj_show( "var4: A", A, "%8.1e + %8.1e", "" );
//FLA_Copy( d0, l );
FLA_Obj_free( &T );
FLA_Obj_free( &r );
//.........這裏部分代碼省略.........
示例5: time_Trsm_lln
//.........這裏部分代碼省略.........
FLA_ONE, A, C );
break;
case 1:{
// Time variant 1
switch( type ){
case FLA_ALG_UNBLOCKED:
FLA_Trsm_lln_unb_var1( FLA_NONUNIT_DIAG, FLA_ONE, A, C );
break;
case FLA_ALG_BLOCKED:
FLA_Trsm_lln_blk_var1( FLA_NONUNIT_DIAG, FLA_ONE, A, C, cntl_trsm_var );
break;
default:
printf("trouble\n");
}
break;
}
case 2:{
// Time variant 2
switch( type ){
case FLA_ALG_UNBLOCKED:
FLA_Trsm_lln_unb_var2( FLA_NONUNIT_DIAG, FLA_ONE, A, C );
break;
case FLA_ALG_BLOCKED:
FLA_Trsm_lln_blk_var2( FLA_NONUNIT_DIAG, FLA_ONE, A, C, cntl_trsm_var );
break;
default:
printf("trouble\n");
}
break;
}
case 3:{
// Time variant 3
switch( type ){
case FLA_ALG_UNBLOCKED:
FLA_Trsm_lln_unb_var3( FLA_NONUNIT_DIAG, FLA_ONE, A, C );
break;
case FLA_ALG_BLOCKED:
FLA_Trsm_lln_blk_var3( FLA_NONUNIT_DIAG, FLA_ONE, A, C, cntl_trsm_var );
break;
default:
printf("trouble\n");
}
break;
}
case 4:{
// Time variant 4
switch( type ){
case FLA_ALG_UNBLOCKED:
FLA_Trsm_lln_unb_var4( FLA_NONUNIT_DIAG, FLA_ONE, A, C );
break;
case FLA_ALG_BLOCKED:
FLA_Trsm_lln_blk_var4( FLA_NONUNIT_DIAG, FLA_ONE, A, C, cntl_trsm_var );
break;
default:
printf("trouble\n");
}
break;
}
}
*dtime = FLA_Clock() - *dtime;
dtime_old = min( *dtime, dtime_old );
}
FLA_Cntl_obj_free( cntl_trsm_var );
FLA_Cntl_obj_free( cntl_trsm_blas );
FLA_Cntl_obj_free( cntl_gemm_blas );
FLA_Blocksize_free( bp );
if ( variant == 0 )
{
FLA_Copy_external( C, C_ref );
*diff = 0.0;
}
else
{
*diff = FLA_Max_elemwise_diff( C, C_ref );
}
*gflops = 1.0 *
FLA_Obj_length( C ) *
FLA_Obj_width( C ) *
FLA_Obj_width( A ) /
dtime_old /
1.0e9;
*dtime = dtime_old;
FLA_Copy_external( C_old, C );
FLA_Obj_free( &C_old );
}
示例6: REF_Svdd_uv_components
//.........這裏部分代碼省略.........
*dtime_qrfa = 0.0;
*dtime_gemm = 0.0;
}
else
{
FLA_Obj AT,
AB;
FLA_Obj UL, UR;
FLA_Part_2x1( A, &AT,
&AB, n_A, FLA_TOP );
FLA_Part_1x2( U, &UL, &UR, n_A, FLA_LEFT );
// Create a temporary n-by-n matrix R.
FLA_Obj_create( dt_A, n_A, n_A, 0, 0, &W );
dtime_temp = FLA_Clock();
{
// Perform a QR factorization.
FLA_QR_blk_external( A, tq );
FLA_Copyr_external( FLA_LOWER_TRIANGULAR, A, UL );
FLA_Setr( FLA_LOWER_TRIANGULAR, FLA_ZERO, A );
}
*dtime_qrfa = FLA_Clock() - dtime_temp;
dtime_temp = FLA_Clock();
{
// Form Q.
FLA_QR_form_Q_external( U, tq );
}
*dtime_appq = FLA_Clock() - dtime_temp;
dtime_temp = FLA_Clock();
{
// Reduce R to bidiagonal form.
FLA_Bidiag_blk_external( AT, tu, tv );
FLA_Bidiag_UT_extract_diagonals( A, d, eT );
}
*dtime_bred = FLA_Clock() - dtime_temp;
dtime_temp = FLA_Clock();
{
// Divide-and-conquor algorithm.
FLA_Bsvdd_external( uplo, d, e, Ur, Vr );
}
*dtime_bsvd = FLA_Clock() - dtime_temp;
dtime_temp = FLA_Clock();
{
// Form U in W.
FLA_Copy_external( Ur, W );
FLA_Bidiag_apply_U_external( FLA_LEFT, FLA_NO_TRANSPOSE, AT, tu, W );
// Form V.
FLA_Copy_external( Vr, V );
FLA_Bidiag_apply_V_external( FLA_RIGHT, FLA_CONJ_TRANSPOSE, AT, tv, V );
}
*dtime_appq += FLA_Clock() - dtime_temp;
dtime_temp = FLA_Clock();
{
// Multiply R into U, storing the result in A and then copying
// back to U.
FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_ONE, UL, W, FLA_ZERO, A );
FLA_Copy( A, UL );
}
*dtime_gemm = FLA_Clock() - dtime_temp;
// Free R.
FLA_Obj_free( &W );
}
}
else
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
}
// Copy singular values to output vector.
FLA_Copy( d, s );
// Sort singular values and vectors.
FLA_Sort_svd( FLA_BACKWARD, s, U, V );
FLA_Obj_free( &tq );
FLA_Obj_free( &tu );
FLA_Obj_free( &tv );
FLA_Obj_free( &d );
FLA_Obj_free( &e );
FLA_Obj_free( &Ur );
FLA_Obj_free( &Vr );
return FLA_SUCCESS;
}
示例7: main
//.........這裏部分代碼省略.........
for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){
// Determine datatype based on trans argument.
if ( pc_str[param_combo][2] == 'h' )
{
if ( precision == FLA_SINGLE_PRECISION )
datatype = FLA_COMPLEX;
else
datatype = FLA_DOUBLE_COMPLEX;
}
else
{
if ( precision == FLA_SINGLE_PRECISION )
datatype = FLA_FLOAT;
else
datatype = FLA_DOUBLE;
}
// If multiplying A on the left, A is m x m; ...on the right, A is n x n.
if ( pc_str[param_combo][0] == 'l' )
FLA_Obj_create( datatype, m, m, 0, 0, &A );
else
FLA_Obj_create( datatype, n, n, 0, 0, &A );
FLA_Obj_create( datatype, m, n, 0, 0, &C );
FLA_Obj_create( datatype, m, n, 0, 0, &C_ref );
if ( pc_str[param_combo][1] == 'l' )
{
FLA_Random_tri_matrix( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, A );
FLA_Random_matrix( C );
}
else
{
FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A );
FLA_Random_matrix( C );
}
fprintf( stdout, "data_trsm_%s( %d, 1:3 ) = [ %d ", pc_str[param_combo], i, p );
fflush( stdout );
time_Trsm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n,
A, C, C_ref, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
/*
time_Trsm( param_combo, FLA_ALG_FRONT, n_repeats, m, n,
A, C, C_ref, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
*/
fprintf( stdout, " ]; \n" );
fflush( stdout );
FLA_Obj_free( &A );
FLA_Obj_free( &C );
FLA_Obj_free( &C_ref );
}
fprintf( stdout, "\n" );
}
/*
fprintf( stdout, "figure;\n" );
fprintf( stdout, "hold on;\n" );
for ( i = 0; i < n_param_combos; i++ ) {
fprintf( stdout, "plot( data_trsm_%s( :,1 ), data_trsm_%s( :, 2 ), '%c:%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
fprintf( stdout, "plot( data_trsm_%s( :,1 ), data_trsm_%s( :, 4 ), '%c-.%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
}
fprintf( stdout, "legend( ... \n" );
for ( i = 0; i < n_param_combos; i++ )
fprintf( stdout, "'ref\\_trsm\\_%s', 'fla\\_trsm\\_%s', ... \n", pc_str[i], pc_str[i] );
fprintf( stdout, "'Location', 'SouthEast' ); \n" );
fprintf( stdout, "xlabel( 'problem size p' );\n" );
fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
fprintf( stdout, "title( 'FLAME trsm front-end performance (%s, %s)' );\n",
m_dim_desc, n_dim_desc );
fprintf( stdout, "print -depsc trsm_front_%s_%s.eps\n", m_dim_tag, n_dim_tag );
fprintf( stdout, "hold off;\n");
fflush( stdout );
*/
FLA_Finalize( );
return 0;
}
示例8: time_Gemm_nn
//.........這裏部分代碼省略.........
break;
default:
printf("trouble\n");
}
break;
}
case 15:{
// Time variant 1->5
switch( type ){
case FLA_ALG_OPENMP_CVAR:
FLA_Gemm_nn_omp_var15( FLA_ONE, A, B, C, nb_alg );
break;
default:
printf("trouble\n");
}
break;
}
case 31:{
// Time variant 3->1
switch( type ){
case FLA_ALG_OPENMP_CVAR:
FLA_Gemm_nn_omp_var31( FLA_ONE, A, B, C, nb_alg );
break;
default:
printf("trouble\n");
}
break;
}
case 35:{
// Time variant 3->5
switch( type ){
case FLA_ALG_OPENMP_CVAR:
FLA_Gemm_nn_omp_var35( FLA_ONE, A, B, C, nb_alg );
break;
default:
printf("trouble\n");
}
break;
}
case 51:{
// Time variant 5->1
switch( type ){
case FLA_ALG_OPENMP_CVAR:
FLA_Gemm_nn_omp_var51( FLA_ONE, A, B, C, nb_alg );
break;
default:
printf("trouble\n");
}
break;
}
case 53:{
// Time variant 5->3
switch( type ){
case FLA_ALG_OPENMP_CVAR:
FLA_Gemm_nn_omp_var53( FLA_ONE, A, B, C, nb_alg );
break;
default:
printf("trouble\n");
}
break;
}
}
if ( irep == 0 )
dtime_old = FLA_Clock() - *dtime;
else{
*dtime = FLA_Clock() - *dtime;
dtime_old = min( *dtime, dtime_old );
}
}
if ( variant == 0 ){
FLA_Copy_external( C, Cref );
*diff = 0.0;
}
else{
*diff = FLA_Max_elemwise_diff( C, Cref );
//FLA_Obj_show( "C:", C, "%f", "\n");
}
*gflops = 2.0 *
FLA_Obj_length( C ) *
FLA_Obj_width( C ) *
FLA_Obj_width( A ) /
dtime_old /
1e9;
*dtime = dtime_old;
FLA_Copy_external( Cold, C );
FLA_Obj_free( &Cold );
}
示例9: FLA_Gemm_nn_omp_var15
FLA_Error FLA_Gemm_nn_omp_var15( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj C, fla_gemm_t* cntl )
{
FLA_Obj AT, A0,
AB, A1,
A2;
FLA_Obj CT, C0,
CB, C1,
C2;
FLA_Obj AL, AR, A10, A11, A12;
FLA_Obj BT, B0,
BB, B1,
B2;
FLA_Obj C1_local;
int i, j, lock_ldim, lock_i;
int b_m, b_k;
FLA_Part_2x1( A, &AT,
&AB, 0, FLA_TOP );
FLA_Part_2x1( C, &CT,
&CB, 0, FLA_TOP );
#pragma intel omp parallel taskq
{
while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) )
{
b_m = FLA_Determine_blocksize( A, AT, FLA_TOP, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x1_to_3x1( AT, &A0,
/* ** */ /* ** */
&A1,
AB, &A2, b_m, FLA_BOTTOM );
FLA_Repart_2x1_to_3x1( CT, &C0,
/* ** */ /* ** */
&C1,
CB, &C2, b_m, FLA_BOTTOM );
/*------------------------------------------------------------*/
/* C1 = alpha * A1 * B + C1; */
FLA_Part_1x2( A1, &AL, &AR, 0, FLA_LEFT );
FLA_Part_2x1( B, &BT,
&BB, 0, FLA_TOP );
while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) )
{
b_k = FLA_Determine_blocksize( A, AL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) );
// Get the index of the current partition.
// FIX THIS: need + b_m - 1 or something like this
//j = FLA_Obj_length( CT ) / b_m;
//i = FLA_Obj_width( AL ) / b_k;
//lock_ldim = FLA_get_num_threads_in_m_dim(omp_get_num_threads());
lock_i = FLA_Obj_length( CT ) / b_m;
FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A10, /**/ &A11, &A12,
b_k, FLA_RIGHT );
FLA_Repart_2x1_to_3x1( BT, &B0,
/* ** */ /* ** */
&B1,
BB, &B2, b_k, FLA_BOTTOM );
/*------------------------------------------------------------*/
/* C1 = alpha * A11 * B1 + C1; */
//// FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
//// alpha, A11, B1, FLA_ONE, C1 );
#pragma intel omp task captureprivate( lock_i, A11, B1, C1 ), private( C1_local )
{
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C1, &C1_local );
FLA_Obj_set_to_zero( C1_local );
/* C1_local = alpha * A1 * B11 + C1_local; */
FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
alpha, A11, B1, FLA_ONE, C1_local );
// Acquire lock[i] (the lock for C1).
omp_set_lock( &fla_omp_lock[lock_i] );
/* C1 += C1_local */
FLA_Axpy_external( FLA_ONE, C1_local, C1 );
//FLA_Axpy_sync_pipeline2( j*lock_ldim, FLA_ONE, C1_local, C1 );
//FLA_Axpy_sync_circular2( j*lock_ldim, i, FLA_ONE, C1_local, C1 );
//REF_Axpy_sync_circular2( j*lock_ldim, i, FLA_ONE, C1_local, C1 );
// Release lock[i] (the lock for C1).
omp_unset_lock( &fla_omp_lock[lock_i] );
FLA_Obj_free( &C1_local );
}
//.........這裏部分代碼省略.........
示例10: time_Syrk_ln
//.........這裏部分代碼省略.........
switch( type ){
case FLA_ALG_OPENMP_1TASK:
FLA_Syrk_ln_omp1t_var2( A, C );
break;
case FLA_ALG_OPENMP_2TASKS:
FLA_Syrk_ln_omp2t_var2( A, C );
break;
case FLA_ALG_OPENMP_2LOOPS:
FLA_Syrk_ln_omp2l_var2( A, C );
break;
case FLA_ALG_OPENMP_2LOOPSPLUS:
FLA_Syrk_ln_omp2x_var2( A, C );
break;
default:
printf("trouble\n");
}
break;
}
case 3:{
// Time variant 3
switch( type ){
case FLA_ALG_OPENMP_1TASK:
FLA_Syrk_ln_omp1t_var3( A, C );
break;
case FLA_ALG_OPENMP_2TASKS:
FLA_Syrk_ln_omp2t_var3( A, C );
break;
case FLA_ALG_OPENMP_2LOOPS:
FLA_Syrk_ln_omp2l_var3( A, C );
break;
default:
printf("trouble\n");
}
break;
}
case 4:{
// Time variant 4
switch( type ){
case FLA_ALG_OPENMP_1TASK:
FLA_Syrk_ln_omp1t_var4( A, C );
break;
case FLA_ALG_OPENMP_2TASKS:
FLA_Syrk_ln_omp2t_var4( A, C );
break;
case FLA_ALG_OPENMP_2LOOPS:
FLA_Syrk_ln_omp2l_var4( A, C );
break;
default:
printf("trouble\n");
}
break;
}
case 5:{
// Time variant 5
switch( type ){
case FLA_ALG_OPENMP_1TASK:
FLA_Syrk_ln_omp1t_var5( A, C );
break;
default:
printf("trouble\n");
}
break;
}
}
if ( irep == 0 )
dtime_old = FLA_Clock() - *dtime;
else{
*dtime = FLA_Clock() - *dtime;
dtime_old = min( *dtime, dtime_old );
}
}
if ( variant == 0 ){
FLA_Copy_external( C, C_ref );
*diff = 0.0;
}
else{
*diff = FLA_Max_elemwise_diff( C, C_ref );
//FLA_Obj_show( "C:", C, "%f", "\n");
}
*gflops = 1.0 *
FLA_Obj_length( A ) *
FLA_Obj_length( A ) *
FLA_Obj_width( A ) /
dtime_old /
1e9;
*dtime = dtime_old;
FLA_Copy_external( C_old, C );
FLA_Obj_free( &C_old );
}
示例11: main
int main( int argc, char** argv ) {
FLA_Datatype testtype = TESTTYPE;
dim_t m;
FLA_Obj A;
FLA_Obj a1, b1, r1;
FLA_Obj a2, b2, r2;
FLA_Uplo uplo;
FLA_Error init_result;
if ( argc == 3 ) {
m = atoi(argv[1]);
uplo = ( atoi(argv[2]) == 1 ? FLA_UPPER_TRIANGULAR : FLA_LOWER_TRIANGULAR );
} else {
fprintf(stderr, " \n");
fprintf(stderr, "Usage: %s m uplo\n", argv[0]);
fprintf(stderr, " m : test matrix length\n");
fprintf(stderr, " uplo : 0) lower, 1) upper\n");
fprintf(stderr, " \n");
return -1;
}
if ( m == 0 )
return 0;
FLA_Init_safe( &init_result );
// Test matrix A
FLA_Obj_create( testtype, m, m, 0, 0, &A );
FLA_Random_spd_matrix( uplo, A );
FLA_Hermitianize( uplo, A );
FLA_Obj_fshow( stdout, "- A -", A, "% 6.4e", "--" );
FLA_Obj_create( testtype, m, 1, 0, 0, &a1 );
FLA_Obj_create( testtype, m, 1, 0, 0, &a2 );
if ( m > 1 ) {
FLA_Obj_create( testtype, m-1, 1, 0, 0, &b1 );
FLA_Obj_create( testtype, m-1, 1, 0, 0, &b2 );
}
FLA_Obj_create( testtype, m, 1, 0, 0, &r1 );
FLA_Obj_create( testtype, m, 1, 0, 0, &r2 );
// Mine
FLA_Tridiag_UT_extract_diagonals( uplo, A, a1, b1 );
FLA_Obj_fshow( stdout, "- a1 -", a1, "% 6.4e", "--" );
if ( m > 1 ) FLA_Obj_fshow( stdout, "- b1 -", b1, "% 6.4e", "--" );
FLA_Tridiag_UT_realify_subdiagonal( b1, r1 );
if ( m > 1 ) FLA_Obj_fshow( stdout, "- b1 realified -", b1, "% 6.4e", "--" );
FLA_Obj_fshow( stdout, "- r1 -", r1, "% 6.4e", "--" );
// Field
FLA_Tridiag_UT_realify( uplo, A, r2 );
FLA_Tridiag_UT_extract_diagonals( uplo, A, a2, b2 );
FLA_Obj_fshow( stdout, "- a2 -", a2, "% 6.4e", "--" );
if ( m > 1 ) FLA_Obj_fshow( stdout, "- b2 realified -", b2, "% 6.4e", "--" );
FLA_Obj_fshow( stdout, "- r2 -", r2, "% 6.4e", "--" );
printf(" diff_a = %e\n", FLA_Max_elemwise_diff( a1, a2 ));
if ( m > 1 ) printf(" diff_b = %e\n", FLA_Max_elemwise_diff( b1, b2 ));
printf(" diff_rL = %e\n", FLA_Max_elemwise_diff( r1, r2 ));
FLA_Obj_fshow( stdout, "- A realified-", A, "% 6.4e", "--" );
FLA_Obj_free( &r2 );
FLA_Obj_free( &r1 );
if ( m > 1 ) {
FLA_Obj_free( &b2 );
FLA_Obj_free( &b1 );
}
FLA_Obj_free( &a2 );
FLA_Obj_free( &a1 );
FLA_Obj_free( &A );
FLA_Finalize_safe( init_result );
}
示例12: main
//.........這裏部分代碼省略.........
sprintf( n_dim_desc, "n = %d", n_input );
sprintf( n_dim_tag, "n%dc", n_input);
}
else if( n_input < -1 ) {
sprintf( n_dim_desc, "n = p/%d", -n_input );
sprintf( n_dim_tag, "n%dp", -n_input );
}
else if( n_input == -1 ) {
sprintf( n_dim_desc, "n = p" );
sprintf( n_dim_tag, "n%dp", 1 );
}
//datatype = FLA_FLOAT;
//datatype = FLA_DOUBLE;
//datatype = FLA_COMPLEX;
datatype = FLA_DOUBLE_COMPLEX;
for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
{
m = m_input;
n = n_input;
if( m < 0 ) m = p / abs(m_input);
if( n < 0 ) n = p / abs(n_input);
for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ) {
if ( pc_str[param_combo][0] == 'n' || pc_str[param_combo][0] == 'c' )
FLA_Obj_create( datatype, m, n, &A );
else
FLA_Obj_create( datatype, n, m, &A );
FLA_Obj_create( datatype, m, n, &C );
FLA_Obj_create( datatype, m, n, &C_ref );
FLA_Random_matrix( A );
FLA_Random_matrix( C );
FLA_Copy_external( C, C_ref );
fprintf( stdout, "data_axpyt_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p );
fflush( stdout );
time_Axpyt( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n,
A, C, C_ref, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
time_Axpyt( param_combo, FLA_ALG_FRONT, n_repeats, m, n,
A, C, C_ref, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
fprintf( stdout, " ]; \n" );
fflush( stdout );
FLA_Obj_free( &A );
FLA_Obj_free( &C );
FLA_Obj_free( &C_ref );
}
fprintf( stdout, "\n" );
}
fprintf( stdout, "figure;\n" );
fprintf( stdout, "hold on;\n" );
for ( i = 0; i < n_param_combos; i++ ) {
fprintf( stdout, "plot( data_axpyt_%s( :,1 ), data_axpyt_%s( :, 2 ), '%c:%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
fprintf( stdout, "plot( data_axpyt_%s( :,1 ), data_axpyt_%s( :, 4 ), '%c-.%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
}
fprintf( stdout, "legend( ... \n" );
for ( i = 0; i < n_param_combos; i++ )
fprintf( stdout, "'ref\\_axpyt\\_%s', 'fla\\_axpyt\\_%s', ... \n", pc_str[i], pc_str[i] );
fprintf( stdout, "'Location', 'SouthEast' ); \n" );
fprintf( stdout, "xlabel( 'problem size p' );\n" );
fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
fprintf( stdout, "title( 'FLAME axpyt front-end performance (%s, %s)' );\n",
m_dim_desc, n_dim_desc );
fprintf( stdout, "print -depsc axpyt_front_%s_%s.eps\n", m_dim_tag, n_dim_tag );
fprintf( stdout, "hold off;\n");
fflush( stdout );
FLA_Finalize( );
return 0;
}
示例13: main
//.........這裏部分代碼省略.........
sprintf( k_dim_desc, "k = p" );
sprintf( k_dim_tag, "k%dp", 1 );
}
//datatype = FLA_FLOAT;
datatype = FLA_DOUBLE;
//datatype = FLA_COMPLEX;
//datatype = FLA_DOUBLE_COMPLEX;
for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
{
m = m_input;
k = k_input;
if( m < 0 ) m = p / f2c_abs(m_input);
if( k < 0 ) k = p / f2c_abs(k_input);
for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){
// If transposing A, switch dimensions.
if ( pc_str[param_combo][1] == 'n' )
{
FLA_Obj_create( datatype, m, k, 0, 0, &A );
FLA_Obj_create( datatype, m, k, 0, 0, &B );
}
else
{
FLA_Obj_create( datatype, k, m, 0, 0, &A );
FLA_Obj_create( datatype, k, m, 0, 0, &B );
}
FLA_Obj_create( datatype, m, m, 0, 0, &C );
FLA_Obj_create( datatype, m, m, 0, 0, &C_ref );
FLA_Random_matrix( A );
FLA_Random_matrix( B );
FLA_Random_matrix( C );
fprintf( stdout, "data_syr2k_%s( %d, 1:3 ) = [ %d ", pc_str[param_combo], i, p );
fflush( stdout );
time_Syr2k( param_combo, FLA_ALG_REFERENCE, n_repeats, m, k,
A, B, C, C_ref, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
/*
time_Syr2k( param_combo, FLA_ALG_FRONT, n_repeats, m, k,
A, B, C, C_ref, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
*/
fprintf( stdout, " ]; \n" );
fflush( stdout );
FLA_Obj_free( &A );
FLA_Obj_free( &B );
FLA_Obj_free( &C );
FLA_Obj_free( &C_ref );
}
fprintf( stdout, "\n" );
}
/*
fprintf( stdout, "figure;\n" );
fprintf( stdout, "hold on;\n" );
for ( i = 0; i < n_param_combos; i++ ) {
fprintf( stdout, "plot( data_syr2k_%s( :,1 ), data_syr2k_%s( :, 2 ), '%c:%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
fprintf( stdout, "plot( data_syr2k_%s( :,1 ), data_syr2k_%s( :, 4 ), '%c-.%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
}
fprintf( stdout, "legend( ... \n" );
for ( i = 0; i < n_param_combos; i++ )
fprintf( stdout, "'ref\\_syr2k\\_%s', 'fla\\_syr2k\\_%s', ... \n", pc_str[i], pc_str[i] );
fprintf( stdout, "'Location', 'SouthEast' ); \n" );
fprintf( stdout, "xlabel( 'problem size p' );\n" );
fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
fprintf( stdout, "title( 'FLAME syr2k front-end performance (%s, %s)' );\n",
m_dim_desc, k_dim_desc );
fprintf( stdout, "print -depsc syr2k_front_%s_%s.eps\n", m_dim_tag, k_dim_tag );
fprintf( stdout, "hold off;\n");
fflush( stdout );
*/
FLA_Finalize( );
return 0;
}
示例14: main
//.........這裏部分代碼省略.........
if( m < 0 ) m = p / f2c_abs(m_input);
//datatype = FLA_FLOAT;
//datatype = FLA_DOUBLE;
//datatype = FLA_COMPLEX;
datatype = FLA_DOUBLE_COMPLEX;
FLA_Obj_create( datatype, m, m, 0, 0, &A );
FLA_Obj_create( datatype, m, m, 0, 0, &Y );
FLA_Obj_create( datatype, m, m, 0, 0, &B );
FLA_Random_spd_matrix( uplo, A );
FLA_Hermitianize( uplo, A );
FLA_Random_spd_matrix( uplo, B );
FLA_Chol( uplo, B );
/*
time_Eig_gest_nu( 0, FLA_ALG_REFERENCE, n_repeats, p, b_alg,
inv, uplo, A, B, &dtime, &diff, &gflops );
fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; \n", i, p, gflops );
fflush( stdout );
*/
for ( variant = 1; variant <= n_variants; variant++ ){
fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d ", variant, i, p );
fflush( stdout );
time_Eig_gest_nu( variant, FLA_ALG_UNBLOCKED, n_repeats, p, b_alg,
inv, uplo, A, Y, B, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
time_Eig_gest_nu( variant, FLA_ALG_UNB_OPT, n_repeats, p, b_alg,
inv, uplo, A, Y, B, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
time_Eig_gest_nu( variant, FLA_ALG_BLOCKED, n_repeats, p, b_alg,
inv, uplo, A, Y, B, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
fprintf( stdout, " ]; \n" );
fflush( stdout );
}
FLA_Obj_free( &A );
FLA_Obj_free( &Y );
FLA_Obj_free( &B );
fprintf( stdout, "\n" );
}
/*
// Print the MATLAB commands to plot the data
// Delete all existing figures
fprintf( stdout, "figure;\n" );
// Plot the performance of the reference implementation
fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" );
// Indicate that you want to add to the existing plot
fprintf( stdout, "hold on;\n" );
// Plot the data for the other numbers of threads
for ( i = 1; i <= n_variants; i++ ){
fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); \n",
i, i, colors[ i-1 ], ticks[ i-1 ] );
}
fprintf( stdout, "legend( ... \n" );
fprintf( stdout, "'Reference', ... \n" );
for ( i = 1; i <= n_variants; i++ )
fprintf( stdout, "'FLAME var%d', ... \n", i );
fprintf( stdout, "'Location', 'SouthEast' ); \n" );
fprintf( stdout, "xlabel( 'problem size p' );\n" );
fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
fprintf( stdout, "title( 'FLAME chol\\_l performance (%s)' );\n",
m_dim_desc );
fprintf( stdout, "print -depsc chol_l_%s.eps\n", m_dim_tag );
fprintf( stdout, "hold off;\n");
fflush( stdout );
*/
FLA_Finalize( );
return 0;
}
示例15: main
//.........這裏部分代碼省略.........
else if( m_input < -1 ) {
sprintf( m_dim_desc, "m = p/%d", -m_input );
sprintf( m_dim_tag, "m%dp", -m_input );
}
else if( m_input == -1 ) {
sprintf( m_dim_desc, "m = p" );
sprintf( m_dim_tag, "m%dp", 1 );
}
//datatype = FLA_FLOAT;
//datatype = FLA_DOUBLE;
//datatype = FLA_COMPLEX;
datatype = FLA_DOUBLE_COMPLEX;
for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
{
m = m_input;
if( m < 0 ) m = p / abs(m_input);
for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){
FLA_Obj_create( datatype, m, m, 0, 0, &A );
FLA_Obj_create( datatype, m, 1, 0, 0, &b );
FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig );
if ( FLA_Obj_is_single_precision( A ) )
FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm );
else
FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm );
if ( pc_str[param_combo][0] == 'l' )
FLA_Random_spd_matrix( FLA_LOWER_TRIANGULAR, A );
else
FLA_Random_spd_matrix( FLA_UPPER_TRIANGULAR, A );
FLA_Copy_external( b, b_orig );
fprintf( stdout, "data_chol_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p );
fflush( stdout );
time_Chol( param_combo, FLA_ALG_REFERENCE, n_repeats, m,
A, b, b_orig, norm, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
time_Chol( param_combo, FLA_ALG_FRONT, n_repeats, m,
A, b, b_orig, norm, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
fprintf( stdout, " ]; \n" );
fflush( stdout );
FLA_Obj_free( &A );
FLA_Obj_free( &b );
FLA_Obj_free( &b_orig );
FLA_Obj_free( &norm );
}
fprintf( stdout, "\n" );
}
/*
fprintf( stdout, "figure;\n" );
fprintf( stdout, "hold on;\n" );
for ( i = 0; i < n_param_combos; i++ ) {
fprintf( stdout, "plot( data_chol_%s( :,1 ), data_chol_%s( :, 2 ), '%c:%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
fprintf( stdout, "plot( data_chol_%s( :,1 ), data_chol_%s( :, 4 ), '%c-.%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
}
fprintf( stdout, "legend( ... \n" );
for ( i = 0; i < n_param_combos; i++ )
fprintf( stdout, "'ref\\_chol\\_%s', 'fla\\_chol\\_%s', ... \n", pc_str[i], pc_str[i] );
fprintf( stdout, "'Location', 'SouthEast' ); \n" );
fprintf( stdout, "xlabel( 'problem size p' );\n" );
fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
fprintf( stdout, "title( 'FLAME chol front-end performance (%s)' );\n", m_dim_desc );
fprintf( stdout, "print -depsc chol_front_%s.eps\n", m_dim_tag );
fprintf( stdout, "hold off;\n");
fflush( stdout );
*/
FLA_Finalize();
return 0;
}