本文整理匯總了C++中FLA_Copy_external函數的典型用法代碼示例。如果您正苦於以下問題:C++ FLA_Copy_external函數的具體用法?C++ FLA_Copy_external怎麽用?C++ FLA_Copy_external使用的例子?那麽, 這裏精選的函數代碼示例或許可以為您提供幫助。
在下文中一共展示了FLA_Copy_external函數的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的C++代碼示例。
示例1: FLA_Chol_solve
FLA_Error FLA_Chol_solve( FLA_Uplo uplo, FLA_Obj A, FLA_Obj B, FLA_Obj X )
{
// Check parameters.
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Chol_solve_check( uplo, A, B, X );
if ( FLA_Obj_is_identical( B, X ) == FALSE )
FLA_Copy_external( B, X );
if ( uplo == FLA_LOWER_TRIANGULAR )
{
FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE,
FLA_NONUNIT_DIAG, FLA_ONE, A, X );
FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE,
FLA_NONUNIT_DIAG, FLA_ONE, A, X );
}
else // if ( uplo == FLA_UPPER_TRIANGULAR )
{
FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE,
FLA_NONUNIT_DIAG, FLA_ONE, A, X );
FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE,
FLA_NONUNIT_DIAG, FLA_ONE, A, X );
}
return FLA_SUCCESS;
}
示例2: FLA_QR_UT_solve
FLA_Error FLA_QR_UT_solve( FLA_Obj A, FLA_Obj T, FLA_Obj B, FLA_Obj X )
{
FLA_Obj W, Y;
FLA_Obj AT, AB;
FLA_Obj YT, YB;
// Check parameters.
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_QR_UT_solve_check( A, T, B, X );
FLA_Apply_Q_UT_create_workspace( T, B, &W );
FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, B, &Y );
FLA_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
A, T, W, Y );
FLA_Part_2x1( A, &AT,
&AB, FLA_Obj_width( A ), FLA_TOP );
FLA_Part_2x1( Y, &YT,
&YB, FLA_Obj_width( A ), FLA_TOP );
FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE,
FLA_NONUNIT_DIAG, FLA_ONE, AT, YT );
FLA_Copy_external( YT, X );
FLA_Obj_free( &Y );
FLA_Obj_free( &W );
return FLA_SUCCESS;
}
示例3: FLA_LU_piv_copy_task
FLA_Error FLA_LU_piv_copy_task( FLA_Obj A, FLA_Obj p, FLA_Obj U, fla_lu_t* cntl )
{
FLA_Error r_val;
r_val = FLA_LU_piv_task( A, p, cntl );
FLA_Copy_external( A, U );
return r_val;
}
示例4: FLA_UDdate_UT_solve
FLA_Error FLA_UDdate_UT_solve( FLA_Obj R, FLA_Obj bR, FLA_Obj x )
{
// Check parameters.
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_UDdate_UT_solve_check( R, bR, x );
// Copy the contents of bR to x so that after the triangular solve, the
// solution resides in x (and bR is preserved).
FLA_Copy_external( bR, x );
// Perform a triangular solve with R the right-hand side.
FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR,
FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
FLA_ONE, R, x );
return FLA_SUCCESS;
}
示例5: FLA_Copy
FLA_Error FLA_Copy( FLA_Obj A, FLA_Obj B )
{
FLA_Error r_val;
#ifdef FLA_ENABLE_BLAS1_FRONT_END_CNTL_TREES
// Check parameters.
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Copy_check( A, B );
// Invoke FLA_Copy_internal() with flat control tree that simply calls
// external wrapper.
r_val = FLA_Copy_internal( A, B, fla_copy_cntl_blas );
#else
r_val = FLA_Copy_external( A, B );
#endif
return r_val;
}
示例6: FLA_Trmvsx_external
FLA_Error FLA_Trmvsx_external( FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y )
{
FLA_Obj x_copy;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Trmvsx_check( uplo, transa, diag, alpha, A, x, beta, y );
if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, x, &x_copy );
FLA_Copy_external( x, x_copy );
FLA_Trmv_external( uplo, transa, diag, A, x_copy );
FLA_Scal_external( beta, y );
FLA_Axpy_external( alpha, x_copy, y );
FLA_Obj_free( &x_copy );
return FLA_SUCCESS;
}
示例7: time_Sylv_nn
void time_Sylv_nn(
int variant, int type, int n_repeats, int m, int n, int nb_alg,
FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, FLA_Obj scale,
double *dtime, double *diff, double *gflops )
{
int
irep;
double
dtime_old = 1.0e9;
FLA_Obj
C_old;
fla_blocksize_t*
bp;
fla_sylv_t*
cntl_sylv_var;
fla_sylv_t*
cntl_sylv_unb;
fla_gemm_t*
cntl_gemm_blas;
/*
if( type == FLA_ALG_UNBLOCKED && n > 400 )
{
*gflops = 0.0;
*diff = 0.0;
return;
}
*/
bp = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );
cntl_sylv_unb = FLA_Cntl_sylv_obj_create( FLA_FLAT, FLA_UNB_OPT_VARIANT1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL );
cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL );
cntl_sylv_var = FLA_Cntl_sylv_obj_create( FLA_FLAT, variant, bp, cntl_sylv_unb, cntl_sylv_unb, cntl_sylv_unb, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas );
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );
FLA_Copy_external( C, C_old );
for ( irep = 0 ; irep < n_repeats; irep++ ){
FLA_Copy_external( C_old, C );
*dtime = FLA_Clock();
switch( variant ){
case 0:
/* Time reference implementation */
REF_Sylv_nn( isgn, A, B, C, scale );
break;
case 1:{
/* Time variant 1 */
switch( type ){
case FLA_ALG_UNB_OPT:
FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
break;
case FLA_ALG_BLOCKED:
FLA_Sylv_nn_blk_var1( isgn, A, B, C, scale, cntl_sylv_var );
break;
default:
printf("trouble\n");
}
break;
}
case 2:{
/* Time variant 2 */
switch( type ){
case FLA_ALG_UNB_OPT:
FLA_Sylv_nn_opt_var2( isgn, A, B, C, scale );
break;
case FLA_ALG_BLOCKED:
FLA_Sylv_nn_blk_var2( isgn, A, B, C, scale, cntl_sylv_var );
break;
default:
printf("trouble\n");
}
break;
}
case 3:{
/* Time variant 3 */
switch( type ){
case FLA_ALG_UNB_OPT:
FLA_Sylv_nn_opt_var3( isgn, A, B, C, scale );
break;
case FLA_ALG_BLOCKED:
FLA_Sylv_nn_blk_var3( isgn, A, B, C, scale, cntl_sylv_var );
break;
default:
printf("trouble\n");
//.........這裏部分代碼省略.........
示例8: FLA_Copy_task
FLA_Error FLA_Copy_task( FLA_Obj A, FLA_Obj B, fla_copy_t* cntl )
{
return FLA_Copy_external( A, B );
}
示例9: time_Gemm
void time_Gemm(
int param_combo, int type, int nrepeats, int m, int k, int n,
FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,
double *dtime, double *diff, double *gflops )
{
int
irep;
double
dtime_old = 1.0e9;
FLA_Obj
C_old;
if ( param_combo != 4 )
{
*gflops = 0.0;
*diff = 0.0;
return;
}
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );
FLA_Copy_external( C, C_old );
for ( irep = 0 ; irep < nrepeats; irep++ ){
FLA_Copy_external( C_old, C );
*dtime = FLA_Clock();
switch( param_combo ){
// Time parameter combination 0
case 0:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
break;
case FLA_ALG_FRONT:
FLA_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
break;
default:
printf("trouble\n");
}
break;
}
// Time parameter combination 1
case 1:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
break;
case FLA_ALG_FRONT:
FLA_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
break;
default:
printf("trouble\n");
}
break;
}
// Time parameter combination 2
case 2:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
break;
case FLA_ALG_FRONT:
FLA_Gemm( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
break;
default:
printf("trouble\n");
}
break;
}
// Time parameter combination 3
case 3:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
break;
case FLA_ALG_FRONT:
FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
break;
default:
printf("trouble\n");
}
break;
}
// Time parameter combination 4
case 4:{
switch( type ){
//.........這裏部分代碼省略.........
示例10: libfla_test_qrut_experiment
void libfla_test_qrut_experiment( test_params_t params,
unsigned int var,
char* sc_str,
FLA_Datatype datatype,
unsigned int p_cur,
unsigned int pci,
unsigned int n_repeats,
signed int impl,
double* perf,
double* residual )
{
dim_t b_flash = params.b_flash;
dim_t b_alg_flat = params.b_alg_flat;
double time_min = 1e9;
double time;
unsigned int i;
unsigned int m, n;
unsigned int min_m_n;
signed int m_input = -2;
signed int n_input = -1;
FLA_Obj A, T, x, b, y, norm;
FLA_Obj A_save;
FLA_Obj A_test, T_test, x_test, b_test;
// Determine the dimensions.
if ( m_input < 0 ) m = p_cur * abs(m_input);
else m = p_cur;
if ( n_input < 0 ) n = p_cur * abs(n_input);
else n = p_cur;
// Compute the minimum dimension.
min_m_n = min( m, n );
// Create the matrices for the current operation.
libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, n, &A );
if ( impl == FLA_TEST_FLAT_FRONT_END ||
( impl == FLA_TEST_FLAT_BLK_VAR && var == 1 ) )
libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat, min_m_n, &T );
else if ( var == 2 )
libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], min_m_n, min_m_n, &T );
else
libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], 1, min_m_n, &T );
// Initialize the test matrices.
FLA_Random_matrix( A );
// Save the original object contents in a temporary object.
FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_save );
// Create vectors to form a linear system.
FLA_Obj_create( datatype, n, 1, 0, 0, &x );
FLA_Obj_create( datatype, m, 1, 0, 0, &b );
FLA_Obj_create( datatype, n, 1, 0, 0, &y );
// Create a real scalar object to hold the norm of A.
FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );
// Create a random right-hand side vector.
FLA_Random_matrix( b );
// Use hierarchical matrices if we're testing the FLASH front-end.
if ( impl == FLA_TEST_HIER_FRONT_END )
{
FLASH_QR_UT_create_hier_matrices( A, 1, &b_flash, &A_test, &T_test );
FLASH_Obj_create_hier_copy_of_flat( b, 1, &b_flash, &b_test );
FLASH_Obj_create_hier_copy_of_flat( x, 1, &b_flash, &x_test );
}
else
{
A_test = A;
T_test = T;
}
// Create a control tree for the individual variants.
if ( impl == FLA_TEST_FLAT_UNB_VAR ||
impl == FLA_TEST_FLAT_OPT_VAR ||
impl == FLA_TEST_FLAT_BLK_VAR )
libfla_test_qrut_cntl_create( var, b_alg_flat );
// Repeat the experiment n_repeats times and record results.
for ( i = 0; i < n_repeats; ++i )
{
if ( impl == FLA_TEST_HIER_FRONT_END )
FLASH_Obj_hierarchify( A_save, A_test );
else
FLA_Copy_external( A_save, A_test );
time = FLA_Clock();
libfla_test_qrut_impl( impl, A_test, T_test );
time = FLA_Clock() - time;
time_min = min( time_min, time );
}
// Perform a linear solve with the result.
if ( impl == FLA_TEST_HIER_FRONT_END )
{
FLASH_QR_UT_solve( A_test, T_test, b_test, x_test );
//.........這裏部分代碼省略.........
示例11: time_QR_UT
void time_QR_UT(
int variant, int type, int nrepeats, int m, int n,
FLA_Obj A, FLA_Obj A_ref, FLA_Obj t, FLA_Obj T, FLA_Obj W, FLA_Obj b, FLA_Obj b_orig,
double *dtime, double *diff, double *gflops )
{
int
irep;
double
dtime_old = 1.0e9;
FLA_Obj
A_save, b_save, norm;
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save );
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, b, &b_save );
if ( FLA_Obj_is_single_precision( A ) )
FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm );
else
FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm );
FLA_Copy_external( A, A_save );
FLA_Copy_external( b, b_save );
for ( irep = 0 ; irep < nrepeats; irep++ ){
FLA_Copy_external( A_save, A );
*dtime = FLA_Clock();
switch( variant ){
case 0:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_QR_UT( A, t );
break;
case FLA_ALG_FRONT:
FLA_QR_UT( A, T );
break;
default:
printf("trouble\n");
}
break;
}
}
*dtime = FLA_Clock() - *dtime;
dtime_old = min( *dtime, dtime_old );
}
if ( type == FLA_ALG_REFERENCE )
{
FLA_Obj AT, AB;
FLA_Obj bT, bB;
FLA_Obj y;
FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &y );
FLA_Copy_external( b, b_orig );
if ( FLA_Obj_is_real( A ) )
FLA_Apply_Q_blk_external( FLA_LEFT, FLA_TRANSPOSE, FLA_COLUMNWISE, A, t, b );
else
FLA_Apply_Q_blk_external( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_COLUMNWISE, A, t, b );
FLA_Part_2x1( A, &AT,
&AB, FLA_Obj_width( A ), FLA_TOP );
FLA_Part_2x1( b, &bT,
&bB, FLA_Obj_width( A ), FLA_TOP );
FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE,
FLA_NONUNIT_DIAG, FLA_ONE, AT, bT );
FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A_save, bT, FLA_ONE, b_orig );
FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A_save, b_orig, FLA_ZERO, y );
FLA_Nrm2_external( y, norm );
FLA_Obj_extract_real_scalar( norm, diff );
FLA_Obj_free( &y );
}
else
{
FLA_Obj x, y;
FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &y );
FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &x );
FLA_Copy_external( b, b_orig );
FLA_QR_UT_solve( A, T, b, x );
FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A_save, x, FLA_ONE, b_orig );
FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A_save, b_orig, FLA_ZERO, y );
FLA_Nrm2_external( y, norm );
FLA_Obj_extract_real_scalar( norm, diff );
//.........這裏部分代碼省略.........
示例12: main
int main(int argc, char *argv[])
{
int
datatype,
m_input,
m,
p_first, p_last, p_inc,
p,
nb_alg,
variant,
n_repeats,
i, j,
n_variants = N_VARIANTS;
char *colors = "brkgmcbrkg";
char *ticks = "o+*xso+*xs";
char m_dim_desc[14];
char m_dim_tag[10];
double max_gflops=6.0;
double
dtime,
gflops,
diff;
FLA_Obj
A, b, b_orig, norm;
FLA_Init();
fprintf( stdout, "%c number of repeats:", '%' );
scanf( "%d", &n_repeats );
fprintf( stdout, "%c %d\n", '%', n_repeats );
fprintf( stdout, "%c Enter blocking size:", '%' );
scanf( "%d", &nb_alg );
fprintf( stdout, "%c %d\n", '%', nb_alg );
fprintf( stdout, "%c enter problem size first, last, inc:", '%' );
scanf( "%d%d%d", &p_first, &p_last, &p_inc );
fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc );
fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' );
scanf( "%d", &m_input );
fprintf( stdout, "%c %d\n", '%', m_input );
fprintf( stdout, "\nclear all;\n\n" );
if ( m_input > 0 ) {
sprintf( m_dim_desc, "m = %d", m_input );
sprintf( m_dim_tag, "m%dc", m_input);
}
else if( m_input < -1 ) {
sprintf( m_dim_desc, "m = p/%d", -m_input );
sprintf( m_dim_tag, "m%dp", -m_input );
}
else if( m_input == -1 ) {
sprintf( m_dim_desc, "m = p" );
sprintf( m_dim_tag, "m%dp", 1 );
}
//datatype = FLA_FLOAT;
//datatype = FLA_DOUBLE;
//datatype = FLA_COMPLEX;
datatype = FLA_DOUBLE_COMPLEX;
for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
{
m = m_input;
if( m < 0 ) m = p / f2c_abs(m_input);
FLA_Obj_create( datatype, m, m, 0, 0, &A );
FLA_Obj_create( datatype, m, 1, 0, 0, &b );
FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig );
/*
FLA_Obj_create( datatype, m, m, m, 1, &A );
FLA_Obj_create( datatype, m, 1, 1, 1, &b );
FLA_Obj_create( datatype, m, 1, 1, 1, &b_orig );
*/
if ( FLA_Obj_is_single_precision( A ) )
FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm );
else
FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm );
FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A );
FLA_Random_matrix( b );
FLA_Copy_external( b, b_orig );
/*
time_Trinv_un( 0, FLA_ALG_REFERENCE, n_repeats, m, nb_alg,
A, b, b_orig, norm, &dtime, &diff, &gflops );
//.........這裏部分代碼省略.........
示例13: time_Her2k_ln
void time_Her2k_ln(
int variant, int type, int nrepeats, int n, int nb_alg,
FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,
double *dtime, double *diff, double *gflops )
{
int
irep;
double
dtime_old = 1.0e9;
FLA_Obj
C_old;
fla_blocksize_t*
bp;
fla_gemm_t*
cntl_gemm_blas;
fla_her2k_t*
cntl_her2k_blas;
fla_her2k_t*
cntl_her2k_var;
bp = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );
cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL );
cntl_her2k_blas = FLA_Cntl_her2k_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL, NULL );
cntl_her2k_var = FLA_Cntl_her2k_obj_create( FLA_FLAT, variant, bp, cntl_her2k_blas, cntl_gemm_blas, cntl_gemm_blas );
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );
FLA_Copy_external( C, C_old );
for ( irep = 0 ; irep < nrepeats; irep++ )
{
FLA_Copy_external( C_old, C );
*dtime = FLA_Clock();
switch( variant ){
case 0:
// Time reference implementation
REF_Her2k( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ONE, C );
break;
case 1:{
// Time variant 1
switch( type ){
case FLA_ALG_UNBLOCKED:
FLA_Her2k_ln_unb_var1( FLA_ONE, A, B, FLA_ONE, C );
break;
case FLA_ALG_BLOCKED:
FLA_Her2k_ln_blk_var1( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );
break;
default:
printf("trouble\n");
}
break;
}
case 2:{
// Time variant 2
switch( type ){
case FLA_ALG_UNBLOCKED:
FLA_Her2k_ln_unb_var2( FLA_ONE, A, B, FLA_ONE, C );
break;
case FLA_ALG_BLOCKED:
FLA_Her2k_ln_blk_var2( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );
break;
default:
printf("trouble\n");
}
break;
}
case 3:{
// Time variant 3
switch( type ){
case FLA_ALG_UNBLOCKED:
FLA_Her2k_ln_unb_var3( FLA_ONE, A, B, FLA_ONE, C );
break;
case FLA_ALG_BLOCKED:
FLA_Her2k_ln_blk_var3( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );
break;
default:
printf("trouble\n");
}
break;
}
case 4:{
// Time variant 4
switch( type ){
case FLA_ALG_UNBLOCKED:
FLA_Her2k_ln_unb_var4( FLA_ONE, A, B, FLA_ONE, C );
break;
//.........這裏部分代碼省略.........
示例14: main
//.........這裏部分代碼省略.........
if ( pc_str[param_combo][0] == 'c' ||
pc_str[param_combo][1] == 'c' )
{
if ( precision == FLA_SINGLE_PRECISION )
datatype = FLA_COMPLEX;
else
datatype = FLA_DOUBLE_COMPLEX;
}
else
{
if ( precision == FLA_SINGLE_PRECISION )
datatype = FLA_FLOAT;
else
datatype = FLA_DOUBLE;
}
// If transposing A, switch dimensions.
if ( pc_str[param_combo][0] == 'n' )
FLA_Obj_create( datatype, m, k, 0, 0, &A );
else
FLA_Obj_create( datatype, k, m, 0, 0, &A );
// If transposing B, switch dimensions.
if ( pc_str[param_combo][1] == 'n' )
FLA_Obj_create( datatype, k, n, 0, 0, &B );
else
FLA_Obj_create( datatype, n, k, 0, 0, &B );
FLA_Obj_create( datatype, m, n, 0, 0, &C );
FLA_Obj_create( datatype, m, n, 0, 0, &C_ref );
FLA_Random_matrix( A );
FLA_Random_matrix( B );
FLA_Random_matrix( C );
FLA_Copy_external( C, C_ref );
fprintf( stdout, "data_gemm_%s( %d, 1:5 ) = [ %4d %4d %4d ", pc_str[param_combo], i, m, k, n );
fflush( stdout );
time_Gemm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, k, n,
A, B, C, C_ref, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
/*
time_Gemm( param_combo, FLA_ALG_FRONT, n_repeats, m, k, n,
A, B, C, C_ref, &dtime, &diff, &gflops );
fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
fflush( stdout );
*/
fprintf( stdout, " ]; \n" );
fflush( stdout );
FLA_Obj_free( &A );
FLA_Obj_free( &B );
FLA_Obj_free( &C );
FLA_Obj_free( &C_ref );
}
fprintf( stdout, "\n" );
}
/*
fprintf( stdout, "figure;\n" );
fprintf( stdout, "hold on;\n" );
for ( i = 0; i < n_param_combos; i++ ) {
fprintf( stdout, "plot( data_gemm_%s( :,1 ), data_gemm_%s( :, 2 ), '%c:%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
fprintf( stdout, "plot( data_gemm_%s( :,1 ), data_gemm_%s( :, 4 ), '%c-.%c' ); \n",
pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
}
fprintf( stdout, "legend( ... \n" );
for ( i = 0; i < n_param_combos; i++ )
fprintf( stdout, "'ref\\_gemm\\_%s', 'fla\\_gemm\\_%s', ... \n", pc_str[i], pc_str[i] );
fprintf( stdout, "'Location', 'SouthEast' ); \n" );
fprintf( stdout, "xlabel( 'problem size p' );\n" );
fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
fprintf( stdout, "title( 'FLAME gemm front-end performance (%s, %s, %s)' );\n",
m_dim_desc, k_dim_desc, n_dim_desc );
fprintf( stdout, "print -depsc gemm_front_%s_%s_%s.eps\n", m_dim_tag, k_dim_tag, n_dim_tag );
fprintf( stdout, "hold off;\n");
fflush( stdout );
*/
FLA_Finalize( );
return 0;
}
示例15: time_Apply_G_rf
void time_Apply_G_rf(
int variant, int type, int n_repeats, int m, int k, int n, int b_alg,
FLA_Obj A, FLA_Obj A_ref, FLA_Obj G, FLA_Obj P,
double *dtime, double *diff, double *gflops )
{
int irep;
double
dtime_old = 1.0e9;
FLA_Obj
A_save, G_save, norm;
if ( FLA_Obj_is_real( A ) )
{
if (
//( variant == 1 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 1 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 1 && type == FLA_ALG_BLOCKED ) ||
//( variant == 2 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 2 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 2 && type == FLA_ALG_BLOCKED ) ||
//( variant == 3 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 3 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 3 && type == FLA_ALG_BLOCKED ) ||
//( variant == 6 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 6 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 6 && type == FLA_ALG_BLOCKED ) ||
//( variant == 9 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 9 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 9 && type == FLA_ALG_BLOCKED ) ||
( variant == 4 ) ||
( variant == 5 ) ||
( variant == 7 ) ||
( variant == 8 ) ||
FALSE
)
{
*gflops = 0.0;
*diff = 0.0;
return;
}
}
else if ( FLA_Obj_is_complex( A ) )
{
if (
//( variant == 1 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 1 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 1 && type == FLA_ALG_BLOCKED ) ||
//( variant == 2 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 2 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 2 && type == FLA_ALG_BLOCKED ) ||
//( variant == 3 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 3 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 3 && type == FLA_ALG_BLOCKED ) ||
//( variant == 6 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 6 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 6 && type == FLA_ALG_BLOCKED ) ||
//( variant == 9 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 9 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 9 && type == FLA_ALG_BLOCKED ) ||
( variant == 4 ) ||
( variant == 5 ) ||
( variant == 7 ) ||
( variant == 8 ) ||
FALSE
)
{
*gflops = 0.0;
*diff = 0.0;
return;
}
}
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save );
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, G, &G_save );
FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );
//dim_t b_flash_m = b_alg;
//dim_t b_flash_n = n;
//FLASH_Obj_create_hier_copy_of_flat_ext( A, 1, &b_flash_m, &b_flash_n, &AH );
//printf ( "flash dims: %d x %d\n", FLA_Obj_length( AH ), FLA_Obj_width( AH ) );
FLA_Copy_external( A, A_save );
FLA_Copy_external( G, G_save );
for ( irep = 0 ; irep < n_repeats; irep++ ){
FLA_Copy_external( A_save, A );
FLA_Copy_external( G_save, G );
//FLASH_Obj_hierarchify( A_save, AH );
*dtime = FLA_Clock();
switch( variant ){
//.........這裏部分代碼省略.........