本文整理匯總了C++中FLA_Clock函數的典型用法代碼示例。如果您正苦於以下問題:C++ FLA_Clock函數的具體用法?C++ FLA_Clock怎麽用?C++ FLA_Clock使用的例子?那麽, 這裏精選的函數代碼示例或許可以為您提供幫助。
在下文中一共展示了FLA_Clock函數的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的C++代碼示例。
示例1: mexFunction
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
int attr[NINT];
FLA_Obj obj[NOBJ];
double *dtime;
FLA_Init();
/* Check if the number of arguments supplied is correct */
FLA_M2C_CheckNumArgs(NRHS, nrhs);
/* Convert Matlab arguments into the appropriate FLAME C arguments */
FLA_M2C_ConvertArgs(NRHS, prhs, NINT, attr, obj);
/* If an extra argument is supplied, collect timing informaion in it. */
if (nrhs == NRHS+1)
dtime = FLA_M2C_ConvertDoublePtr(prhs[NRHS]);
/* Now call the C FLAME function, timing it if the extra argument is given. */
if (nrhs == NRHS+1)
*dtime = FLA_Clock();
FLA_Axpyt_external(attr[0], obj[0], obj[1], obj[2]);
if (nrhs == NRHS+1)
*dtime = FLA_Clock() - *dtime;
FLA_Finalize();
}
示例2: main
int main(int argc, char *argv[])
{
int m, n, k, nfirst, nlast, ninc, i, irep,
nrepeats, nb_alg, check;;
double
dtime,
dtime_best,
gflops,
max_gflops,
diff,
d_n;
FLA_Obj
A, B, C, Cref, Cold;
/* Initialize FLAME */
FLA_Init( );
/* Every time trial is repeated "repeat" times */
printf( "%% number of repeats:" );
scanf( "%d", &nrepeats );
printf( "%% %d\n", nrepeats );
/* Enter the max GFLOPS attainable */
printf( "%% enter max GFLOPS:" );
scanf( "%lf", &max_gflops );
printf( "%% %lf\n", max_gflops );
/* Enter the algorithmic block size */
printf( "%% enter nb_alg:" );
scanf( "%d", &nb_alg );
printf( "%% %d\n", nb_alg );
/* Timing trials for matrix sizes n=nfirst to nlast in increments
of ninc will be performed */
printf( "%% enter nfirst, nlast, ninc:" );
scanf( "%d%d%d", &nfirst, &nlast, &ninc );
printf( "%% %d %d %d\n", nfirst, nlast, ninc );
i = 1;
for ( n=nfirst; n<= nlast; n+=ninc ){
/* Allocate space for the matrices */
FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &A );
FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &B );
FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &C );
FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Cref );
FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Cold );
/* Generate random matrices L and B */
FLA_Random_matrix( A );
FLA_Random_matrix( B );
FLA_Random_matrix( Cold );
gflops = 2.0 * n * n * n * 1.0e-09;
/* Time FLA_Symm */
for ( irep=0; irep<nrepeats; irep++ ){
FLA_Copy( Cold, Cref );
dtime = FLA_Clock();
FLA_Symm( FLA_LEFT, FLA_LOWER_TRIANGULAR,
FLA_ONE, A, B, FLA_ONE, Cref );
dtime = FLA_Clock() - dtime;
if ( irep == 0 )
dtime_best = dtime;
else
dtime_best = ( dtime < dtime_best ? dtime : dtime_best );
}
printf( "data_FLAME( %d, 1:2 ) = [ %d %le ];\n", i, n,
gflops / dtime_best );
fflush( stdout );
/* Time the your implementations */
#if TEST_UNB_VAR1==TRUE
/* Variant 1 unblocked */
for ( irep=0; irep<nrepeats; irep++ ){
FLA_Copy( Cold, C );
dtime = FLA_Clock();
Symm_unb_var1( A, B, C );
dtime = FLA_Clock() - dtime;
if ( irep == 0 )
dtime_best = dtime;
else
//.........這裏部分代碼省略.........
示例3: time_Apply_Q
void time_Apply_Q(
int param_combo, int type, int nrepeats, int m, int n,
FLA_Obj A, FLA_Obj B, FLA_Obj B_ref, FLA_Obj t, FLA_Obj T, FLA_Obj W,
double *dtime, double *diff, double *gflops )
{
int
irep;
double
dtime_old = 1.0e9;
FLA_Obj
B_save, A_flat, B_flat;
FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, B, &B_save );
FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat );
FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, B, &B_flat );
FLASH_Copy( B, B_save );
for ( irep = 0 ; irep < nrepeats; irep++ )
{
FLASH_Copy( B_save, B );
FLASH_Obj_flatten( A, A_flat );
FLASH_Obj_flatten( B, B_flat );
*dtime = FLA_Clock();
switch( param_combo ){
// Time parameter combination 0
case 0:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Apply_Q( FLA_LEFT, FLA_TRANSPOSE, FLA_COLUMNWISE, A_flat, t, B_flat );
break;
case FLA_ALG_FRONT:
//printf("\n");
FLASH_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE, A, T, W, B );
break;
default:
printf("trouble\n");
}
break;
}
}
*dtime = FLA_Clock() - *dtime;
dtime_old = min( *dtime, dtime_old );
}
if ( type == FLA_ALG_REFERENCE )
{
FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE,
FLA_NONUNIT_DIAG, FLA_ONE, A_flat, B_flat );
FLASH_Obj_hierarchify( B_flat, B_ref );
*diff = 0.0;
}
else
{
FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
FLA_ONE, A, B );
*diff = FLASH_Max_elemwise_diff( B, B_ref );
}
*gflops = 2.0 *
FLASH_Obj_scalar_length( A ) *
FLASH_Obj_scalar_width( A ) *
FLASH_Obj_scalar_width( B ) /
dtime_old /
1.0e9;
if ( FLA_Obj_is_complex( A ) )
*gflops *= 4.0;
*dtime = dtime_old;
FLASH_Copy( B_save, B );
FLASH_Obj_free( &B_save );
FLASH_Obj_free( &A_flat );
FLASH_Obj_free( &B_flat );
}
示例4: time_Her2k_ln
void time_Her2k_ln(
int variant, int type, int nrepeats, int n, int nb_alg,
FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,
double *dtime, double *diff, double *gflops )
{
int
irep;
double
dtime_old = 1.0e9;
FLA_Obj
C_old;
fla_blocksize_t*
bp;
fla_gemm_t*
cntl_gemm_blas;
fla_her2k_t*
cntl_her2k_blas;
fla_her2k_t*
cntl_her2k_var;
bp = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );
cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL );
cntl_her2k_blas = FLA_Cntl_her2k_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL, NULL );
cntl_her2k_var = FLA_Cntl_her2k_obj_create( FLA_FLAT, variant, bp, cntl_her2k_blas, cntl_gemm_blas, cntl_gemm_blas );
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );
FLA_Copy_external( C, C_old );
for ( irep = 0 ; irep < nrepeats; irep++ )
{
FLA_Copy_external( C_old, C );
*dtime = FLA_Clock();
switch( variant ){
case 0:
// Time reference implementation
REF_Her2k( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ONE, C );
break;
case 1:{
// Time variant 1
switch( type ){
case FLA_ALG_UNBLOCKED:
FLA_Her2k_ln_unb_var1( FLA_ONE, A, B, FLA_ONE, C );
break;
case FLA_ALG_BLOCKED:
FLA_Her2k_ln_blk_var1( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );
break;
default:
printf("trouble\n");
}
break;
}
case 2:{
// Time variant 2
switch( type ){
case FLA_ALG_UNBLOCKED:
FLA_Her2k_ln_unb_var2( FLA_ONE, A, B, FLA_ONE, C );
break;
case FLA_ALG_BLOCKED:
FLA_Her2k_ln_blk_var2( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );
break;
default:
printf("trouble\n");
}
break;
}
case 3:{
// Time variant 3
switch( type ){
case FLA_ALG_UNBLOCKED:
FLA_Her2k_ln_unb_var3( FLA_ONE, A, B, FLA_ONE, C );
break;
case FLA_ALG_BLOCKED:
FLA_Her2k_ln_blk_var3( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );
break;
default:
printf("trouble\n");
}
break;
}
case 4:{
// Time variant 4
switch( type ){
case FLA_ALG_UNBLOCKED:
FLA_Her2k_ln_unb_var4( FLA_ONE, A, B, FLA_ONE, C );
break;
//.........這裏部分代碼省略.........
示例5: libfla_test_symm_experiment
//.........這裏部分代碼省略.........
// Save the original object contents in a temporary object.
FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, C, &C_save );
// Use hierarchical matrices if we're testing the FLASH front-end.
if ( impl == FLA_TEST_HIER_FRONT_END )
{
FLASH_Obj_create_hier_copy_of_flat( A, 1, &b_flash, &A_test );
FLASH_Obj_create_hier_copy_of_flat( B, 1, &b_flash, &B_test );
FLASH_Obj_create_hier_copy_of_flat( C, 1, &b_flash, &C_test );
}
else
{
A_test = A;
B_test = B;
C_test = C;
}
// Create a control tree for the individual variants.
if ( impl == FLA_TEST_FLAT_UNB_VAR ||
impl == FLA_TEST_FLAT_OPT_VAR ||
impl == FLA_TEST_FLAT_BLK_VAR ||
impl == FLA_TEST_FLAT_UNB_EXT ||
impl == FLA_TEST_FLAT_BLK_EXT )
libfla_test_symm_cntl_create( var, b_alg_flat );
// Repeat the experiment n_repeats times and record results.
for ( i = 0; i < n_repeats; ++i )
{
if ( impl == FLA_TEST_HIER_FRONT_END )
FLASH_Obj_hierarchify( C_save, C_test );
else
FLA_Copy_external( C_save, C_test );
time = FLA_Clock();
libfla_test_symm_impl( impl, side, uplo, alpha, A_test, B_test, beta, C_test );
time = FLA_Clock() - time;
time_min = min( time_min, time );
}
// Copy the solution to flat matrix X.
if ( impl == FLA_TEST_HIER_FRONT_END )
{
FLASH_Obj_flatten( C_test, C );
}
else
{
// No action needed since C_test and C refer to the same object.
}
// Free the hierarchical matrices if we're testing the FLASH front-end.
if ( impl == FLA_TEST_HIER_FRONT_END )
{
FLASH_Obj_free( &A_test );
FLASH_Obj_free( &B_test );
FLASH_Obj_free( &C_test );
}
// Free the control trees if we're testing the variants.
if ( impl == FLA_TEST_FLAT_UNB_VAR ||
impl == FLA_TEST_FLAT_OPT_VAR ||
impl == FLA_TEST_FLAT_BLK_VAR ||
impl == FLA_TEST_FLAT_UNB_EXT ||
impl == FLA_TEST_FLAT_BLK_EXT )
libfla_test_symm_cntl_free();
示例6: time_Sylv_nn
void time_Sylv_nn(
int variant, int type, int n_repeats, int m, int n, int nb_alg,
FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, FLA_Obj scale,
double *dtime, double *diff, double *gflops )
{
int
irep;
double
dtime_old = 1.0e9;
FLA_Obj
C_old;
fla_blocksize_t*
bp;
fla_sylv_t*
cntl_sylv_var;
fla_sylv_t*
cntl_sylv_unb;
fla_gemm_t*
cntl_gemm_blas;
/*
if( type == FLA_ALG_UNBLOCKED && n > 400 )
{
*gflops = 0.0;
*diff = 0.0;
return;
}
*/
bp = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );
cntl_sylv_unb = FLA_Cntl_sylv_obj_create( FLA_FLAT, FLA_UNB_OPT_VARIANT1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL );
cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL );
cntl_sylv_var = FLA_Cntl_sylv_obj_create( FLA_FLAT, variant, bp, cntl_sylv_unb, cntl_sylv_unb, cntl_sylv_unb, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas );
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );
FLA_Copy_external( C, C_old );
for ( irep = 0 ; irep < n_repeats; irep++ ){
FLA_Copy_external( C_old, C );
*dtime = FLA_Clock();
switch( variant ){
case 0:
/* Time reference implementation */
REF_Sylv_nn( isgn, A, B, C, scale );
break;
case 1:{
/* Time variant 1 */
switch( type ){
case FLA_ALG_UNB_OPT:
FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
break;
case FLA_ALG_BLOCKED:
FLA_Sylv_nn_blk_var1( isgn, A, B, C, scale, cntl_sylv_var );
break;
default:
printf("trouble\n");
}
break;
}
case 2:{
/* Time variant 2 */
switch( type ){
case FLA_ALG_UNB_OPT:
FLA_Sylv_nn_opt_var2( isgn, A, B, C, scale );
break;
case FLA_ALG_BLOCKED:
FLA_Sylv_nn_blk_var2( isgn, A, B, C, scale, cntl_sylv_var );
break;
default:
printf("trouble\n");
}
break;
}
case 3:{
/* Time variant 3 */
switch( type ){
case FLA_ALG_UNB_OPT:
FLA_Sylv_nn_opt_var3( isgn, A, B, C, scale );
break;
case FLA_ALG_BLOCKED:
FLA_Sylv_nn_blk_var3( isgn, A, B, C, scale, cntl_sylv_var );
break;
default:
printf("trouble\n");
//.........這裏部分代碼省略.........
示例7: time_QR_UT
void time_QR_UT(
int variant, int type, int nrepeats, int m, int n,
FLA_Obj A, FLA_Obj A_ref, FLA_Obj t, FLA_Obj T, FLA_Obj W, FLA_Obj b, FLA_Obj b_orig,
double *dtime, double *diff, double *gflops )
{
int
irep;
double
dtime_old = 1.0e9;
FLA_Obj
A_save, b_save, norm;
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save );
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, b, &b_save );
if ( FLA_Obj_is_single_precision( A ) )
FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm );
else
FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm );
FLA_Copy_external( A, A_save );
FLA_Copy_external( b, b_save );
for ( irep = 0 ; irep < nrepeats; irep++ ){
FLA_Copy_external( A_save, A );
*dtime = FLA_Clock();
switch( variant ){
case 0:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_QR_UT( A, t );
break;
case FLA_ALG_FRONT:
FLA_QR_UT( A, T );
break;
default:
printf("trouble\n");
}
break;
}
}
*dtime = FLA_Clock() - *dtime;
dtime_old = min( *dtime, dtime_old );
}
if ( type == FLA_ALG_REFERENCE )
{
FLA_Obj AT, AB;
FLA_Obj bT, bB;
FLA_Obj y;
FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &y );
FLA_Copy_external( b, b_orig );
if ( FLA_Obj_is_real( A ) )
FLA_Apply_Q_blk_external( FLA_LEFT, FLA_TRANSPOSE, FLA_COLUMNWISE, A, t, b );
else
FLA_Apply_Q_blk_external( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_COLUMNWISE, A, t, b );
FLA_Part_2x1( A, &AT,
&AB, FLA_Obj_width( A ), FLA_TOP );
FLA_Part_2x1( b, &bT,
&bB, FLA_Obj_width( A ), FLA_TOP );
FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE,
FLA_NONUNIT_DIAG, FLA_ONE, AT, bT );
FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A_save, bT, FLA_ONE, b_orig );
FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A_save, b_orig, FLA_ZERO, y );
FLA_Nrm2_external( y, norm );
FLA_Obj_extract_real_scalar( norm, diff );
FLA_Obj_free( &y );
}
else
{
FLA_Obj x, y;
FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &y );
FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &x );
FLA_Copy_external( b, b_orig );
FLA_QR_UT_solve( A, T, b, x );
FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A_save, x, FLA_ONE, b_orig );
FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A_save, b_orig, FLA_ZERO, y );
FLA_Nrm2_external( y, norm );
FLA_Obj_extract_real_scalar( norm, diff );
//.........這裏部分代碼省略.........
示例8: time_Gemm_hh
void time_Gemm_hh(
int variant, int type, int nrepeats, int n, int nb_alg,
FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,
double *dtime, double *diff, double *gflops )
{
int
irep;
double
dtime_old = 1.0e9;
FLA_Obj
C_old;
fla_blocksize_t*
bp;
fla_gemm_t*
cntl_gemm_blas;
fla_gemm_t*
cntl_gemm_var;
bp = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );
cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL );
cntl_gemm_var = FLA_Cntl_gemm_obj_create( FLA_FLAT, variant, bp, cntl_gemm_blas );
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );
FLA_Copy_external( C, C_old );
for ( irep = 0 ; irep < nrepeats; irep++ ){
FLA_Copy_external( C_old, C );
*dtime = FLA_Clock();
switch( variant ){
// Time reference implementation
case 0:
REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_ONE, A, B, FLA_ONE, C );
break;
// Time variant 1
case 1:{
switch( type ){
case FLA_ALG_UNBLOCKED:
FLA_Gemm_hh_unb_var1( FLA_ONE, A, B, FLA_ONE, C );
break;
case FLA_ALG_BLOCKED:
FLA_Gemm_hh_blk_var1( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );
break;
default:
printf("trouble\n");
}
break;
}
// Time variant 2
case 2:{
switch( type ){
case FLA_ALG_UNBLOCKED:
FLA_Gemm_hh_unb_var2( FLA_ONE, A, B, FLA_ONE, C );
break;
case FLA_ALG_BLOCKED:
FLA_Gemm_hh_blk_var2( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );
break;
default:
printf("trouble\n");
}
break;
}
// Time variant 3
case 3:{
switch( type ){
case FLA_ALG_UNBLOCKED:
FLA_Gemm_hh_unb_var3( FLA_ONE, A, B, FLA_ONE, C );
break;
case FLA_ALG_BLOCKED:
FLA_Gemm_hh_blk_var3( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );
break;
default:
printf("trouble\n");
}
break;
}
// Time variant 4
case 4:{
switch( type ){
case FLA_ALG_UNBLOCKED:
FLA_Gemm_hh_unb_var4( FLA_ONE, A, B, FLA_ONE, C );
break;
case FLA_ALG_BLOCKED:
FLA_Gemm_hh_blk_var4( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );
break;
default:
//.........這裏部分代碼省略.........
示例9: time_Gemm
void time_Gemm(
int param_combo, int type, int nrepeats, int m, int k, int n,
FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,
double *dtime, double *diff, double *gflops )
{
int
irep;
double
dtime_old = 1.0e9;
FLA_Obj
C_old;
if ( param_combo != 4 )
{
*gflops = 0.0;
*diff = 0.0;
return;
}
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );
FLA_Copy_external( C, C_old );
for ( irep = 0 ; irep < nrepeats; irep++ ){
FLA_Copy_external( C_old, C );
*dtime = FLA_Clock();
switch( param_combo ){
// Time parameter combination 0
case 0:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
break;
case FLA_ALG_FRONT:
FLA_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
break;
default:
printf("trouble\n");
}
break;
}
// Time parameter combination 1
case 1:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
break;
case FLA_ALG_FRONT:
FLA_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
break;
default:
printf("trouble\n");
}
break;
}
// Time parameter combination 2
case 2:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
break;
case FLA_ALG_FRONT:
FLA_Gemm( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
break;
default:
printf("trouble\n");
}
break;
}
// Time parameter combination 3
case 3:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
break;
case FLA_ALG_FRONT:
FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
break;
default:
printf("trouble\n");
}
break;
}
// Time parameter combination 4
case 4:{
switch( type ){
//.........這裏部分代碼省略.........
示例10: time_Tevd_v
void time_Tevd_v(
int variant, int type, int n_repeats, int m, int k_accum, int b_alg, int n_iter_max,
FLA_Obj A_orig, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj R, FLA_Obj W, FLA_Obj A, FLA_Obj l,
double *dtime, double *diff1, double* diff2, double *gflops )
{
int irep;
double
k, dtime_old = 1.0e9;
FLA_Obj
A_save, G_save, d_save, e_save;
if (
//( variant == 0 ) ||
//( variant == 1 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 2 && type == FLA_ALG_UNB_OPT ) ||
FALSE
)
{
*dtime = 0.0;
*gflops = 0.0;
*diff1 = 0.0;
*diff2 = 0.0;
return;
}
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save );
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, G, &G_save );
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, d, &d_save );
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, e, &e_save );
FLA_Copy_external( A, A_save );
FLA_Copy_external( G, G_save );
FLA_Copy_external( d, d_save );
FLA_Copy_external( e, e_save );
for ( irep = 0 ; irep < n_repeats; irep++ ){
FLA_Copy_external( A_save, A );
FLA_Copy_external( G_save, G );
FLA_Copy_external( d_save, d );
FLA_Copy_external( e_save, e );
*dtime = FLA_Clock();
switch( variant ){
case 0:
REF_Tevd_v( d, e, A );
break;
// Time variant 1
case 1:
{
switch( type ){
case FLA_ALG_UNB_OPT:
FLA_Tevd_v_opt_var1( n_iter_max, d, e, G, A, b_alg );
break;
}
break;
}
// Time variant 2
case 2:
{
switch( type ){
case FLA_ALG_UNB_OPT:
FLA_Tevd_v_opt_var2( n_iter_max, d, e, G, R, W, A, b_alg );
break;
}
break;
}
}
*dtime = FLA_Clock() - *dtime;
dtime_old = min( *dtime, dtime_old );
}
{
FLA_Obj V, A_rev_evd, norm, eye;
FLA_Copy( d, l );
//FLA_Obj_show( "A_save", A_save, "%9.2e + %9.2e ", "" );
//FLA_Obj_show( "A_evd", A, "%9.2e + %9.2e ", "" );
FLA_Sort_evd( FLA_FORWARD, l, A );
FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &V );
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_rev_evd );
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &eye );
FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );
FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, l, A );
FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_ONE, A, V, FLA_ZERO, A_rev_evd );
FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, A_rev_evd );
//.........這裏部分代碼省略.........
示例11: time_Sylv
void time_Sylv(
int param_combo, int type, int nrepeats, int m, int n,
FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, FLA_Obj scale,
double *dtime, double *diff, double *gflops )
{
int
irep;
double
dtime_old = 1.0e9;
FLA_Obj
C_old;
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );
FLA_Copy_external( C, C_old );
for ( irep = 0 ; irep < nrepeats; irep++ ){
FLA_Copy_external( C_old, C );
*dtime = FLA_Clock();
switch( param_combo ){
case 0:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Sylv( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B, C, scale );
break;
case FLA_ALG_FRONT:
FLA_Sylv( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B, C, scale );
break;
default:
printf("trouble\n");
}
break;
}
case 1:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Sylv( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, isgn, A, B, C, scale );
break;
case FLA_ALG_FRONT:
FLA_Sylv( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, isgn, A, B, C, scale );
break;
default:
printf("trouble\n");
}
break;
}
case 2:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Sylv( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B, C, scale );
break;
case FLA_ALG_FRONT:
FLA_Sylv( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B, C, scale );
break;
default:
printf("trouble\n");
}
break;
}
case 3:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Sylv( FLA_TRANSPOSE, FLA_TRANSPOSE, isgn, A, B, C, scale );
break;
case FLA_ALG_FRONT:
FLA_Sylv( FLA_TRANSPOSE, FLA_TRANSPOSE, isgn, A, B, C, scale );
break;
default:
printf("trouble\n");
}
break;
}
}
*dtime = FLA_Clock() - *dtime;
dtime_old = min( *dtime, dtime_old );
}
if ( type == FLA_ALG_REFERENCE ){
FLA_Copy_external( C, C_ref );
*diff = 0.0;
}
else{
*diff = FLA_Max_elemwise_diff( C, C_ref );
}
//.........這裏部分代碼省略.........
示例12: time_Gemm_nn
//.........這裏部分代碼省略.........
break;
default:
printf("trouble\n");
}
break;
}
case 15:{
// Time variant 1->5
switch( type ){
case FLA_ALG_OPENMP_CVAR:
FLA_Gemm_nn_omp_var15( FLA_ONE, A, B, C, nb_alg );
break;
default:
printf("trouble\n");
}
break;
}
case 31:{
// Time variant 3->1
switch( type ){
case FLA_ALG_OPENMP_CVAR:
FLA_Gemm_nn_omp_var31( FLA_ONE, A, B, C, nb_alg );
break;
default:
printf("trouble\n");
}
break;
}
case 35:{
// Time variant 3->5
switch( type ){
case FLA_ALG_OPENMP_CVAR:
FLA_Gemm_nn_omp_var35( FLA_ONE, A, B, C, nb_alg );
break;
default:
printf("trouble\n");
}
break;
}
case 51:{
// Time variant 5->1
switch( type ){
case FLA_ALG_OPENMP_CVAR:
FLA_Gemm_nn_omp_var51( FLA_ONE, A, B, C, nb_alg );
break;
default:
printf("trouble\n");
}
break;
}
case 53:{
// Time variant 5->3
switch( type ){
case FLA_ALG_OPENMP_CVAR:
FLA_Gemm_nn_omp_var53( FLA_ONE, A, B, C, nb_alg );
break;
default:
printf("trouble\n");
}
break;
}
}
if ( irep == 0 )
dtime_old = FLA_Clock() - *dtime;
else{
*dtime = FLA_Clock() - *dtime;
dtime_old = min( *dtime, dtime_old );
}
}
if ( variant == 0 ){
FLA_Copy_external( C, Cref );
*diff = 0.0;
}
else{
*diff = FLA_Max_elemwise_diff( C, Cref );
//FLA_Obj_show( "C:", C, "%f", "\n");
}
*gflops = 2.0 *
FLA_Obj_length( C ) *
FLA_Obj_width( C ) *
FLA_Obj_width( A ) /
dtime_old /
1e9;
*dtime = dtime_old;
FLA_Copy_external( Cold, C );
FLA_Obj_free( &Cold );
}
示例13: time_Syrk_ln
//.........這裏部分代碼省略.........
switch( type ){
case FLA_ALG_OPENMP_1TASK:
FLA_Syrk_ln_omp1t_var2( A, C );
break;
case FLA_ALG_OPENMP_2TASKS:
FLA_Syrk_ln_omp2t_var2( A, C );
break;
case FLA_ALG_OPENMP_2LOOPS:
FLA_Syrk_ln_omp2l_var2( A, C );
break;
case FLA_ALG_OPENMP_2LOOPSPLUS:
FLA_Syrk_ln_omp2x_var2( A, C );
break;
default:
printf("trouble\n");
}
break;
}
case 3:{
// Time variant 3
switch( type ){
case FLA_ALG_OPENMP_1TASK:
FLA_Syrk_ln_omp1t_var3( A, C );
break;
case FLA_ALG_OPENMP_2TASKS:
FLA_Syrk_ln_omp2t_var3( A, C );
break;
case FLA_ALG_OPENMP_2LOOPS:
FLA_Syrk_ln_omp2l_var3( A, C );
break;
default:
printf("trouble\n");
}
break;
}
case 4:{
// Time variant 4
switch( type ){
case FLA_ALG_OPENMP_1TASK:
FLA_Syrk_ln_omp1t_var4( A, C );
break;
case FLA_ALG_OPENMP_2TASKS:
FLA_Syrk_ln_omp2t_var4( A, C );
break;
case FLA_ALG_OPENMP_2LOOPS:
FLA_Syrk_ln_omp2l_var4( A, C );
break;
default:
printf("trouble\n");
}
break;
}
case 5:{
// Time variant 5
switch( type ){
case FLA_ALG_OPENMP_1TASK:
FLA_Syrk_ln_omp1t_var5( A, C );
break;
default:
printf("trouble\n");
}
break;
}
}
if ( irep == 0 )
dtime_old = FLA_Clock() - *dtime;
else{
*dtime = FLA_Clock() - *dtime;
dtime_old = min( *dtime, dtime_old );
}
}
if ( variant == 0 ){
FLA_Copy_external( C, C_ref );
*diff = 0.0;
}
else{
*diff = FLA_Max_elemwise_diff( C, C_ref );
//FLA_Obj_show( "C:", C, "%f", "\n");
}
*gflops = 1.0 *
FLA_Obj_length( A ) *
FLA_Obj_length( A ) *
FLA_Obj_width( A ) /
dtime_old /
1e9;
*dtime = dtime_old;
FLA_Copy_external( C_old, C );
FLA_Obj_free( &C_old );
}
示例14: time_Syrk
void time_Syrk(
int param_combo, int type, int nrepeats, int m, int k,
FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,
double *dtime, double *diff, double *gflops )
{
int
irep;
double
dtime_old = 1.0e9;
FLA_Obj
C_old, A_flat, C_flat;
FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );
FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat );
FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat );
FLASH_Copy( C, C_old );
for ( irep = 0 ; irep < nrepeats; irep++ )
{
FLASH_Copy( C_old, C );
FLASH_Obj_flatten( A, A_flat );
FLASH_Obj_flatten( C, C_flat );
*dtime = FLA_Clock();
switch( param_combo ){
// Time parameter combination 0
case 0:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Syrk( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat );
break;
case FLA_ALG_FRONT:
FLASH_Syrk( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C );
break;
default:
printf("trouble\n");
}
break;
}
// Time parameter combination 1
case 1:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Syrk( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat );
break;
case FLA_ALG_FRONT:
FLASH_Syrk( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C );
break;
default:
printf("trouble\n");
}
break;
}
// Time parameter combination 2
case 2:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Syrk( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat );
break;
case FLA_ALG_FRONT:
FLASH_Syrk( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C );
break;
default:
printf("trouble\n");
}
break;
}
// Time parameter combination 3
case 3:{
switch( type ){
case FLA_ALG_REFERENCE:
REF_Syrk( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat );
break;
case FLA_ALG_FRONT:
FLASH_Syrk( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C );
break;
default:
printf("trouble\n");
}
break;
}
}
*dtime = FLA_Clock() - *dtime;
dtime_old = min( *dtime, dtime_old );
}
//.........這裏部分代碼省略.........
示例15: time_Apply_G_rf
void time_Apply_G_rf(
int variant, int type, int n_repeats, int m, int k, int n, int b_alg,
FLA_Obj A, FLA_Obj A_ref, FLA_Obj G, FLA_Obj P,
double *dtime, double *diff, double *gflops )
{
int irep;
double
dtime_old = 1.0e9;
FLA_Obj
A_save, G_save, norm;
if ( FLA_Obj_is_real( A ) )
{
if (
//( variant == 1 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 1 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 1 && type == FLA_ALG_BLOCKED ) ||
//( variant == 2 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 2 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 2 && type == FLA_ALG_BLOCKED ) ||
//( variant == 3 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 3 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 3 && type == FLA_ALG_BLOCKED ) ||
//( variant == 6 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 6 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 6 && type == FLA_ALG_BLOCKED ) ||
//( variant == 9 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 9 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 9 && type == FLA_ALG_BLOCKED ) ||
( variant == 4 ) ||
( variant == 5 ) ||
( variant == 7 ) ||
( variant == 8 ) ||
FALSE
)
{
*gflops = 0.0;
*diff = 0.0;
return;
}
}
else if ( FLA_Obj_is_complex( A ) )
{
if (
//( variant == 1 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 1 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 1 && type == FLA_ALG_BLOCKED ) ||
//( variant == 2 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 2 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 2 && type == FLA_ALG_BLOCKED ) ||
//( variant == 3 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 3 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 3 && type == FLA_ALG_BLOCKED ) ||
//( variant == 6 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 6 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 6 && type == FLA_ALG_BLOCKED ) ||
//( variant == 9 && type == FLA_ALG_UNB_OPT ) ||
//( variant == 9 && type == FLA_ALG_UNB_ASM ) ||
//( variant == 9 && type == FLA_ALG_BLOCKED ) ||
( variant == 4 ) ||
( variant == 5 ) ||
( variant == 7 ) ||
( variant == 8 ) ||
FALSE
)
{
*gflops = 0.0;
*diff = 0.0;
return;
}
}
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save );
FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, G, &G_save );
FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );
//dim_t b_flash_m = b_alg;
//dim_t b_flash_n = n;
//FLASH_Obj_create_hier_copy_of_flat_ext( A, 1, &b_flash_m, &b_flash_n, &AH );
//printf ( "flash dims: %d x %d\n", FLA_Obj_length( AH ), FLA_Obj_width( AH ) );
FLA_Copy_external( A, A_save );
FLA_Copy_external( G, G_save );
for ( irep = 0 ; irep < n_repeats; irep++ ){
FLA_Copy_external( A_save, A );
FLA_Copy_external( G_save, G );
//FLASH_Obj_hierarchify( A_save, AH );
*dtime = FLA_Clock();
switch( variant ){
//.........這裏部分代碼省略.........