當前位置: 首頁>>代碼示例>>C++>>正文


C++ FLA_Repart_2x1_to_3x1函數代碼示例

本文整理匯總了C++中FLA_Repart_2x1_to_3x1函數的典型用法代碼示例。如果您正苦於以下問題:C++ FLA_Repart_2x1_to_3x1函數的具體用法?C++ FLA_Repart_2x1_to_3x1怎麽用?C++ FLA_Repart_2x1_to_3x1使用的例子?那麽, 這裏精選的函數代碼示例或許可以為您提供幫助。


在下文中一共展示了FLA_Repart_2x1_to_3x1函數的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的C++代碼示例。

示例1: FLA_QR_UT_piv_blk_var2

FLA_Error FLA_QR_UT_piv_blk_var2( FLA_Obj A, FLA_Obj T, FLA_Obj w, FLA_Obj p, fla_qrut_t* cntl )
{
  FLA_Obj ATL,   ATR,      A00, A01, A02, 
          ABL,   ABR,      A10, A11, A12,
                           A20, A21, A22;

  FLA_Obj TL,    TR,       T0,  T1,  W12;
  FLA_Obj TT,    TB;

  FLA_Obj pT,              p0,
          pB,              p1,
                           p2;

  FLA_Obj wT,              w0,
          wB,              w1,
                           w2;

  dim_t   b_alg, b;

  // Query the algorithmic blocksize by inspecting the length of T.
  b_alg = FLA_Obj_length( T );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     0, 0, FLA_TL );

  FLA_Part_1x2( T,    &TL,  &TR,      0, FLA_LEFT );

  FLA_Part_2x1( p,    &pT, 
                      &pB,            0, FLA_TOP );

  FLA_Part_2x1( w,    &wT, 
                      &wB,            0, FLA_TOP );

  while ( FLA_Obj_min_dim( ABR ) > 0 ){

    b = min( b_alg, FLA_Obj_min_dim( ABR ) );

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                        /* ************* */   /* ******************** */
                                                &A10, /**/ &A11, &A12,
                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                           b, b, FLA_BR );

    FLA_Repart_1x2_to_1x3( TL,  /**/ TR,        &T0, /**/ &T1, &W12,
                           b, FLA_RIGHT );

    FLA_Repart_2x1_to_3x1( pT,                &p0, 
                        /* ** */            /* ** */
                                              &p1, 
                           pB,                &p2,        b, FLA_BOTTOM );

    FLA_Repart_2x1_to_3x1( wT,                &w0, 
                        /* ** */            /* ** */
                                              &w1, 
                           wB,                &w2,        b, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    // ** Reshape T matrices to match the blocksize b
    FLA_Part_2x1( TR,   &TT, 
                        &TB,    b, FLA_TOP );

    // ** Perform a unblocked (BLAS2-oriented) QR factorization 
    // with pivoting via the UT transform on ABR:
    //
    //   ABR  -> QB1 R11
    //
    // where:
    //  - QB1 is formed from UB1 (which is stored column-wise below the
    //    diagonal of ( A11 A21 )^T and the upper-triangle of T1. 
    //  - R11 is stored to ( A11 A12 ).
    //  - W12 stores  T and partial updates for FLA_Apply_Q_UT_piv_var.
    FLA_QR_UT_piv_internal( ABR, TT, wB, p1, 
                            FLA_Cntl_sub_qrut( cntl ) );

    if ( FLA_Obj_width( A12 ) > 0 )
    {
      // ** Block update
      FLA_Part_2x1( W12,  &TT, 
                          &TB,    b, FLA_TOP );
 
      FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 
                         FLA_MINUS_ONE, A21, TT, FLA_ONE, A22 );
    }

    // ** Apply pivots to previous columns.
    FLA_Apply_pivots( FLA_RIGHT, FLA_TRANSPOSE, p1, ATR );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                     A10, A11, /**/ A12,
                            /* ************** */  /* ****************** */
                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                              FLA_TL );

    FLA_Cont_with_1x3_to_1x2( &TL,  /**/ &TR,        T0, T1, /**/ W12,
                              FLA_LEFT );

    FLA_Cont_with_3x1_to_2x1( &pT,                p0, 
//.........這裏部分代碼省略.........
開發者ID:anaptyxis,項目名稱:libflame,代碼行數:101,代碼來源:FLA_QR_UT_piv_blk_var2.c

示例2: FLASH_SA_LU

FLA_Error FLASH_SA_LU( FLA_Obj B, FLA_Obj C, 
                       FLA_Obj D, FLA_Obj E, FLA_Obj p, FLA_Obj L, dim_t nb_alg, fla_lu_t* cntl )
{
   FLA_Obj DT,              D0,
           DB,              D1,
                            D2;

   FLA_Obj ET,              E0,
           EB,              E1,
                            E2;

   FLA_Obj pT,              p0,
           pB,              p1,
                            p2;

   FLA_Obj LT,              L0,
           LB,              L1,
                            L2;

   FLA_Part_2x1( D,    &DT,
                       &DB,            0, FLA_TOP );

   FLA_Part_2x1( E,    &ET,
                       &EB,            0, FLA_TOP );

   FLA_Part_2x1( p,    &pT,
                       &pB,            0, FLA_TOP );

   FLA_Part_2x1( L,    &LT,
                       &LB,            0, FLA_TOP );

   while ( FLA_Obj_length( DT ) < FLA_Obj_length( D ) )
   {
      FLA_Repart_2x1_to_3x1( DT,                &D0,
                          /* ** */            /* ** */
                                                &D1,
                             DB,                &D2,        1, FLA_BOTTOM );

      FLA_Repart_2x1_to_3x1( ET,                &E0,
                          /* ** */            /* ** */
                                                &E1,
                             EB,                &E2,        1, FLA_BOTTOM );

      FLA_Repart_2x1_to_3x1( pT,                &p0,
                          /* ** */            /* ** */
                                                &p1,
                             pB,                &p2,        1, FLA_BOTTOM );

      FLA_Repart_2x1_to_3x1( LT,                &L0,
                          /* ** */            /* ** */
                                                &L1,
                             LB,                &L2,        1, FLA_BOTTOM );

      /*------------------------------------------------------------*/

      if ( FLASH_Queue_get_enabled( ) )
      {
         // Enqueue
         ENQUEUE_FLASH_SA_LU( *FLASH_OBJ_PTR_AT( B ),
                              *FLASH_OBJ_PTR_AT( D1 ),
                              *FLASH_OBJ_PTR_AT( p1 ),
                              *FLASH_OBJ_PTR_AT( L1 ),
                              nb_alg,
                              FLA_Cntl_sub_lu( cntl ) );
      }
      else
      {
         // Execute leaf
         FLA_SA_LU_task( *FLASH_OBJ_PTR_AT( B ),
                         *FLASH_OBJ_PTR_AT( D1 ),
                         *FLASH_OBJ_PTR_AT( p1 ),
                         *FLASH_OBJ_PTR_AT( L1 ),
                         nb_alg,
                         FLA_Cntl_sub_lu( cntl ) );
      }
      
      FLASH_SA_FS( L1,
                   D1, p1, C,
                           E1, nb_alg, FLA_Cntl_sub_gemm1( cntl ) );

      /*------------------------------------------------------------*/

      FLA_Cont_with_3x1_to_2x1( &DT,                D0,
                                                    D1,
                              /* ** */           /* ** */
                                &DB,                D2,     FLA_TOP );

      FLA_Cont_with_3x1_to_2x1( &ET,                E0,
                                                    E1,
                              /* ** */           /* ** */
                                &EB,                E2,     FLA_TOP );

      FLA_Cont_with_3x1_to_2x1( &pT,                p0,
                                                    p1,
                              /* ** */           /* ** */
                                &pB,                p2,     FLA_TOP );

      FLA_Cont_with_3x1_to_2x1( &LT,                L0,
                                                    L1,
                              /* ** */           /* ** */
//.........這裏部分代碼省略.........
開發者ID:anaptyxis,項目名稱:libflame,代碼行數:101,代碼來源:FLASH_SA_LU.c

示例3: FLA_Her2k_ln_unb_var6

FLA_Error FLA_Her2k_ln_unb_var6( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C )
{
  FLA_Obj AT,              A0,
          AB,              a1t,
                           A2;

  FLA_Obj BT,              B0,
          BB,              b1t,
                           B2;

  FLA_Obj CTL,   CTR,      C00,  c01,     C02, 
          CBL,   CBR,      c10t, gamma11, c12t,
                           C20,  c21,     C22;


  FLA_Scalr_external( FLA_LOWER_TRIANGULAR, beta, C );

  FLA_Part_2x1( A,    &AT, 
                      &AB,            0, FLA_BOTTOM );

  FLA_Part_2x1( B,    &BT, 
                      &BB,            0, FLA_BOTTOM );

  FLA_Part_2x2( C,    &CTL, &CTR,
                      &CBL, &CBR,     0, 0, FLA_BR );

  while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){


    FLA_Repart_2x1_to_3x1( AT,                &A0, 
                                              &a1t, 
                        /* ** */            /* ** */
                           AB,                &A2,        1, FLA_TOP );

    FLA_Repart_2x1_to_3x1( BT,                &B0, 
                                              &b1t, 
                        /* ** */            /* ** */
                           BB,                &B2,        1, FLA_TOP );

    FLA_Repart_2x2_to_3x3( CTL, /**/ CTR,       &C00,  &c01,     /**/ &C02,
                                                &c10t, &gamma11, /**/ &c12t,
                        /* ************* */   /* ************************** */
                           CBL, /**/ CBR,       &C20,  &c21,     /**/ &C22,
                           1, 1, FLA_TL );

    /*------------------------------------------------------------*/

    /* c10t = c10t + a1t * B0' */
    FLA_Gemv_external( FLA_CONJ_NO_TRANSPOSE, alpha, B0, a1t, FLA_ONE, c10t );

    /* c21 = c21 + B2 * a1t' */
    FLA_Gemvc_external( FLA_NO_TRANSPOSE, FLA_CONJUGATE, alpha, B2, a1t, FLA_ONE, c21 );    

    /* gamma11 = gamma11 + a1t * b1t' + b1t * a1t' */
    FLA_Dot2cs_external( FLA_CONJUGATE, alpha, a1t, b1t, FLA_ONE, gamma11 );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x1_to_2x1( &AT,                A0, 
                            /* ** */           /* ** */
                                                  a1t, 
                              &AB,                A2,     FLA_BOTTOM );

    FLA_Cont_with_3x1_to_2x1( &BT,                B0, 
                            /* ** */           /* ** */
                                                  b1t, 
                              &BB,                B2,     FLA_BOTTOM );

    FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR,       C00,  /**/ c01,     C02,
                            /* ************** */  /* ************************ */
                                                     c10t, /**/ gamma11, c12t,
                              &CBL, /**/ &CBR,       C20,  /**/ c21,     C22,
                              FLA_BR );

  }

  return FLA_SUCCESS;
}
開發者ID:pgawron,項目名稱:tlash,代碼行數:78,代碼來源:FLA_Her2k_ln_unb_var6.c

示例4: FLA_UDdate_UT_blk_var2

FLA_Error FLA_UDdate_UT_blk_var2( FLA_Obj R,
                                  FLA_Obj C,
                                  FLA_Obj D, FLA_Obj T, fla_uddateut_t* cntl )
{
    FLA_Obj CT,              C0,
            CB,              C1,
            C2;

    FLA_Obj DT,              D0,
            DB,              D1,
            D2;

    FLA_Obj TT,              T0,
            TB,              T1,
            T2;

    dim_t   b_C, b_D, b_T;

    FLA_Part_2x1( C,    &CT,
                  &CB,            0, FLA_TOP );

    FLA_Part_2x1( D,    &DT,
                  &DB,            0, FLA_TOP );

    FLA_Part_2x1( T,    &TT,
                  &TB,            0, FLA_TOP );

    while ( FLA_Obj_length( CT ) < FLA_Obj_length( C ) &&
            FLA_Obj_length( DT ) < FLA_Obj_length( D ) ) {

        b_C = FLA_Determine_blocksize( CB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) );
        b_D = FLA_Determine_blocksize( DB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) );
        b_T = FLA_Determine_blocksize( TB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) );

        FLA_Repart_2x1_to_3x1( CT,                &C0,
                               /* ** */          /* ****** */
                               &C1,
                               CB,                &C2,        b_C, FLA_BOTTOM );

        FLA_Repart_2x1_to_3x1( DT,                &D0,
                               /* ** */          /* ****** */
                               &D1,
                               DB,                &D2,        b_D, FLA_BOTTOM );

        FLA_Repart_2x1_to_3x1( TT,                &T0,
                               /* ** */          /* ****** */
                               &T1,
                               TB,                &T2,        b_T, FLA_BOTTOM );

        /*------------------------------------------------------------*/

        /*
           Perform an up/downdate of the upper triangular Cholesky factor R via
           "UD" UT Householder transformations:

             [ R, ...
               C1, ...
               D1, T1 ] = FLA_UDdate_UT( R, ...
                                         C1, ...
                                         D1, T1 );

           by updating R in such a way that removes the contributions of the rows
           in D1 while simultaneously adding new contributions to the factorization
           from the rows of C1. Note that C1 and D1 are also updated in the process.
           Also note that either C1 or D1 may become empty at any iteration.
        */

        FLA_UDdate_UT_internal( R,
                                C1,
                                D1, T1,
                                FLA_Cntl_sub_uddateut( cntl ) );

        /*------------------------------------------------------------*/

        FLA_Cont_with_3x1_to_2x1( &CT,                C0,
                                  C1,
                                  /* ** */          /* ****** */
                                  &CB,                C2,     FLA_TOP );

        FLA_Cont_with_3x1_to_2x1( &DT,                D0,
                                  D1,
                                  /* ** */          /* ****** */
                                  &DB,                D2,     FLA_TOP );

        FLA_Cont_with_3x1_to_2x1( &TT,                T0,
                                  T1,
                                  /* ** */          /* ****** */
                                  &TB,                T2,     FLA_TOP );
    }

    return FLA_SUCCESS;
}
開發者ID:fmarrabal,項目名稱:libflame,代碼行數:92,代碼來源:FLA_UDdate_UT_blk_var2.c

示例5: Symm_unb_var6

int Symm_unb_var6( FLA_Obj A, FLA_Obj B, FLA_Obj C )
{
  FLA_Obj ATL,   ATR,      A00,  a01,     A02, 
          ABL,   ABR,      a10t, alpha11, a12t,
          A20,  a21,     A22;

  FLA_Obj BT,              B0,
          BB,              b1t,
          B2;

  FLA_Obj CT,              C0,
          CB,              c1t,
          C2;

  FLA_Part_2x2( A,    &ATL, &ATR,
      &ABL, &ABR,     0, 0, FLA_TL );

  FLA_Part_2x1( B,    &BT, 
      &BB,            0, FLA_TOP );

  FLA_Part_2x1( C,    &CT, 
      &CB,            0, FLA_TOP );

  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,
        /* ************* */   /* ************************** */
        &a10t, /**/ &alpha11, &a12t,
        ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,
        1, 1, FLA_BR );

    FLA_Repart_2x1_to_3x1( BT,                &B0, 
        /* ** */            /* *** */
        &b1t, 
        BB,                &B2,        1, FLA_BOTTOM );

    FLA_Repart_2x1_to_3x1( CT,                &C0, 
        /* ** */            /* *** */
        &c1t, 
        CB,                &C2,        1, FLA_BOTTOM );

    /*------------------------------------------------------------*/
    
    // c1t = c1t + a10t*B0 + alpha11*b1t + a21t*B2;
    FLA_Gemv(FLA_TRANSPOSE, FLA_ONE, B0, a10t, FLA_ONE, c1t);
    FLA_Gemv(FLA_TRANSPOSE, FLA_ONE, B2, a21, FLA_ONE, c1t);
    FLA_Axpy(alpha11, b1t, c1t);

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,
        a10t, alpha11, /**/ a12t,
        /* ************** */  /* ************************ */
        &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,
        FLA_TL );

    FLA_Cont_with_3x1_to_2x1( &BT,                B0, 
        b1t, 
        /* ** */           /* *** */
        &BB,                B2,     FLA_TOP );

    FLA_Cont_with_3x1_to_2x1( &CT,                C0, 
        c1t, 
        /* ** */           /* *** */
        &CB,                C2,     FLA_TOP );

  }

  return FLA_SUCCESS;
}
開發者ID:ebeweber,項目名稱:CS378-Invariant-Project,代碼行數:70,代碼來源:Symm_unb_var6.c

示例6: FLA_Syrk_ln_omp2l_var3

FLA_Error FLA_Syrk_ln_omp2l_var3( FLA_Obj A, FLA_Obj C )
{
  FLA_Obj AT,              A0,
          AB,              A1,
                           A2;

  FLA_Obj CTL,   CTR,      C00, C01, C02, 
          CBL,   CBR,      C10, C11, C12,
                           C20, C21, C22;

  int b;

  FLA_Part_2x1( A,    &AT, 
                      &AB,            0, FLA_BOTTOM );

  FLA_Part_2x2( C,    &CTL, &CTR,
                      &CBL, &CBR,     0, 0, FLA_BR );

  #pragma intel omp parallel taskq
  {
  while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){

    b = FLA_Task_compute_blocksize( 0, A, AB, FLA_BOTTOM );

    FLA_Repart_2x1_to_3x1( AT,                &A0, 
                                              &A1, 
                        /* ** */            /* ** */
                           AB,                &A2,        b, FLA_TOP );

    FLA_Repart_2x2_to_3x3( CTL, /**/ CTR,       &C00, &C01, /**/ &C02,
                                                &C10, &C11, /**/ &C12,
                        /* ************* */   /* ******************** */
                           CBL, /**/ CBR,       &C20, &C21, /**/ &C22,
                           b, b, FLA_TL );

    /*------------------------------------------------------------*/

    #pragma intel omp task captureprivate(A1, A2, C21)
    {
    /* C21 = C21 + A2 * A1' */
    FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A2, A1, FLA_ONE, C21 );
    }

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x1_to_2x1( &AT,                A0, 
                            /* ** */           /* ** */
                                                  A1, 
                              &AB,                A2,     FLA_BOTTOM );

    FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR,       C00, /**/ C01, C02,
                            /* ************** */  /* ****************** */
                                                     C10, /**/ C11, C12,
                              &CBL, /**/ &CBR,       C20, /**/ C21, C22,
                              FLA_BR );

  }

  FLA_Part_2x1( A,    &AT, 
                      &AB,            0, FLA_BOTTOM );

  FLA_Part_2x2( C,    &CTL, &CTR,
                      &CBL, &CBR,     0, 0, FLA_BR );

  while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){

    b = FLA_Task_compute_blocksize( 0, A, AB, FLA_BOTTOM );

    FLA_Repart_2x1_to_3x1( AT,                &A0, 
                                              &A1, 
                        /* ** */            /* ** */
                           AB,                &A2,        b, FLA_TOP );

    FLA_Repart_2x2_to_3x3( CTL, /**/ CTR,       &C00, &C01, /**/ &C02,
                                                &C10, &C11, /**/ &C12,
                        /* ************* */   /* ******************** */
                           CBL, /**/ CBR,       &C20, &C21, /**/ &C22,
                           b, b, FLA_TL );

    /*------------------------------------------------------------*/

    #pragma intel omp task captureprivate(C11, A1)
    {
    /* C11 = C11 + A1 * A1' */
    FLA_Syrk_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A1, FLA_ONE, C11 );
    }

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x1_to_2x1( &AT,                A0, 
                            /* ** */           /* ** */
                                                  A1, 
                              &AB,                A2,     FLA_BOTTOM );

    FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR,       C00, /**/ C01, C02,
                            /* ************** */  /* ****************** */
                                                     C10, /**/ C11, C12,
                              &CBL, /**/ &CBR,       C20, /**/ C21, C22,
                              FLA_BR );

//.........這裏部分代碼省略.........
開發者ID:pgawron,項目名稱:tlash,代碼行數:101,代碼來源:FLA_Syrk_ln_omp2l_var3.c

示例7: FLA_Trsv_un_blk_var1

FLA_Error FLA_Trsv_un_blk_var1( FLA_Diag diagA, FLA_Obj A, FLA_Obj x, fla_trsv_t* cntl )
{
  FLA_Obj ATL,   ATR,      A00, A01, A02, 
          ABL,   ABR,      A10, A11, A12,
                           A20, A21, A22;

  FLA_Obj xT,              x0,
          xB,              x1,
                           x2;

  dim_t b;

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     0, 0, FLA_BR );

  FLA_Part_2x1( x,    &xT, 
                      &xB,            0, FLA_BOTTOM );

  while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){

    b = FLA_Determine_blocksize( ATL, FLA_TL, FLA_Cntl_blocksize( cntl ) );

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, &A01, /**/ &A02,
                                                &A10, &A11, /**/ &A12,
                        /* ************* */   /* ******************** */
                           ABL, /**/ ABR,       &A20, &A21, /**/ &A22,
                           b, b, FLA_TL );

    FLA_Repart_2x1_to_3x1( xT,                &x0, 
                                              &x1, 
                        /* ** */            /* ** */
                           xB,                &x2,        b, FLA_TOP );

    /*------------------------------------------------------------*/

    /* x1 = x1 - A12 * x2; */
    FLA_Gemv_internal( FLA_NO_TRANSPOSE, 
                       FLA_MINUS_ONE, A12, x2, FLA_ONE, x1,
                       FLA_Cntl_sub_gemv( cntl ) );

    /* x1 = triu( A11 ) \ x1; */
    FLA_Trsv_internal( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, diagA,
                       A11, x1,
                       FLA_Cntl_sub_trsv( cntl ) );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, /**/ A01, A02,
                            /* ************** */  /* ****************** */
                                                     A10, /**/ A11, A12,
                              &ABL, /**/ &ABR,       A20, /**/ A21, A22,
                              FLA_BR );

    FLA_Cont_with_3x1_to_2x1( &xT,                x0, 
                            /* ** */           /* ** */
                                                  x1, 
                              &xB,                x2,     FLA_BOTTOM );

  }

  return FLA_SUCCESS;
}
開發者ID:anaptyxis,項目名稱:libflame,代碼行數:62,代碼來源:FLA_Trsv_un_blk_var1.c

示例8: FLA_Apply_QUD_UT_lhfc_blk_var1

FLA_Error FLA_Apply_QUD_UT_lhfc_blk_var1( FLA_Obj T, FLA_Obj W,
                                                     FLA_Obj R,
                                          FLA_Obj U, FLA_Obj C,
                                          FLA_Obj V, FLA_Obj D, fla_apqudut_t* cntl )
{
  FLA_Obj TL,    TR,       T0,  T1,  T2;

  FLA_Obj UL,    UR,       U0,  U1,  U2;

  FLA_Obj VL,    VR,       V0,  V1,  V2;

  FLA_Obj RT,              R0,
          RB,              R1,
                           R2;

  FLA_Obj T1T,
          T1B;

  FLA_Obj W1TL,  W1TR,
          W1BL,  W1BR;

  dim_t   b_alg, b;

  // Query the algorithmic blocksize by inspecting the length of T.
  b_alg = FLA_Obj_length( T );

  FLA_Part_1x2( T,    &TL,  &TR,      0, FLA_LEFT );

  FLA_Part_1x2( U,    &UL,  &UR,      0, FLA_LEFT );

  FLA_Part_1x2( V,    &VL,  &VR,      0, FLA_LEFT );

  FLA_Part_2x1( R,    &RT, 
                      &RB,            0, FLA_TOP );

  while ( FLA_Obj_width( UL ) < FLA_Obj_width( U ) ){

    b = min( b_alg, FLA_Obj_width( UR ) );

    FLA_Repart_1x2_to_1x3( TL,  /**/ TR,        &T0, /**/ &T1, &T2,
                           b, FLA_RIGHT );

    FLA_Repart_1x2_to_1x3( UL,  /**/ UR,        &U0, /**/ &U1, &U2,
                           b, FLA_RIGHT );

    FLA_Repart_1x2_to_1x3( VL,  /**/ VR,        &V0, /**/ &V1, &V2,
                           b, FLA_RIGHT );

    FLA_Repart_2x1_to_3x1( RT,                &R0, 
                        /* ** */            /* ** */
                                              &R1, 
                           RB,                &R2,        b, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    FLA_Part_2x1( T1,    &T1T,
                         &T1B,    b, FLA_TOP );

    FLA_Part_2x2( W,     &W1TL, &W1TR,
                         &W1BL, &W1BR,     b, FLA_Obj_width( R1 ), FLA_TL );

    // W1TL = R1;

    FLA_Copyt_internal( FLA_NO_TRANSPOSE, R1, W1TL,
                        FLA_Cntl_sub_copyt( cntl ) );

    // W1TL = inv( triu( T1T ) )' * ( R1 + U1' * C + V1' * D );

    FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, 
                       FLA_ONE, U1, C, FLA_ONE, W1TL,
                       FLA_Cntl_sub_gemm1( cntl ) );

    FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, 
                       FLA_ONE, V1, D, FLA_ONE, W1TL,
                       FLA_Cntl_sub_gemm2( cntl ) );

    FLA_Trsm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR,
                       FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
                       FLA_ONE, T1T, W1TL,
                       FLA_Cntl_sub_trsm( cntl ) );

    // R1 = R1 - W1TL;
    // C  = C  - U1 * W1TL;
    // D  = D  + V1 * W1TL;

    FLA_Axpyt_internal( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, W1TL, R1,
                        FLA_Cntl_sub_axpyt( cntl ) );

    FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
                       FLA_MINUS_ONE, U1, W1TL, FLA_ONE, C,
                       FLA_Cntl_sub_gemm3( cntl ) );

    FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
                       FLA_ONE, V1, W1TL, FLA_ONE, D,
                       FLA_Cntl_sub_gemm4( cntl ) );

    /*------------------------------------------------------------*/

    FLA_Cont_with_1x3_to_1x2( &TL,  /**/ &TR,        T0, T1, /**/ T2,
                              FLA_LEFT );
//.........這裏部分代碼省略.........
開發者ID:anaptyxis,項目名稱:libflame,代碼行數:101,代碼來源:FLA_Apply_QUD_UT_lhfc_blk_var1.c

示例9: Symm_blk_var8

int Symm_blk_var8( FLA_Obj A, FLA_Obj B, FLA_Obj C, int nb_alg )
{
  FLA_Obj ATL,   ATR,      A00, A01, A02, 
          ABL,   ABR,      A10, A11, A12,
                           A20, A21, A22;

  FLA_Obj BT,              B0,
          BB,              B1,
                           B2;

  FLA_Obj CT,              C0,
          CB,              C1,
                           C2;

  int b;

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     0, 0, FLA_TL );

  FLA_Part_2x1( B,    &BT, 
                      &BB,            0, FLA_TOP );

  FLA_Part_2x1( C,    &CT, 
                      &CB,            0, FLA_TOP );

  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){

    b = min( FLA_Obj_length( ABR ), nb_alg );

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                        /* ************* */   /* ******************** */
                                                &A10, /**/ &A11, &A12,
                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                           b, b, FLA_BR );

    FLA_Repart_2x1_to_3x1( BT,                &B0, 
                        /* ** */            /* ** */
                                              &B1, 
                           BB,                &B2,        b, FLA_BOTTOM );

    FLA_Repart_2x1_to_3x1( CT,                &C0, 
                        /* ** */            /* ** */
                                              &C1, 
                           CB,                &C2,        b, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    // C0 = C0 + A10^T*B1;
    FLA_Gemm(FLA_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A10, B1, FLA_ONE, C0);
    
    // C1 = C1 + A10*B0 + A11*B1;
    FLA_Gemm(FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A10, B0, FLA_ONE, C1);
    // FLA_Gemm(FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A11, B1, FLA_ONE, C1);
    FLA_Symm(FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_ONE, A11, B1, FLA_ONE, C1);

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                     A10, A11, /**/ A12,
                            /* ************** */  /* ****************** */
                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                              FLA_TL );

    FLA_Cont_with_3x1_to_2x1( &BT,                B0, 
                                                  B1, 
                            /* ** */           /* ** */
                              &BB,                B2,     FLA_TOP );

    FLA_Cont_with_3x1_to_2x1( &CT,                C0, 
                                                  C1, 
                            /* ** */           /* ** */
                              &CB,                C2,     FLA_TOP );

  }

  return FLA_SUCCESS;
}
開發者ID:ebeweber,項目名稱:CS378-Invariant-Project,代碼行數:77,代碼來源:Symm_blk_var8.c

示例10: FLA_Gemm_pp_nn_var1

FLA_Error FLA_Gemm_pp_nn_var1( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj C, int nb_alg )
{
  FLA_Obj AT,              A0,
          AB,              A1,
                           A2;

  FLA_Obj CT,              C0,
          CB,              C1,
                           C2;

  FLA_Obj packed_C1;

  int b;

  FLA_Part_2x1( A,    &AT, 
                      &AB,            0, FLA_TOP );

  FLA_Part_2x1( C,    &CT, 
                      &CB,            0, FLA_TOP );

  /* Initialize the FLA_Gemm() interface to the kernel environment
     Note: the blocksize given to the kernel environment can be non-
     square. We pass the m and n dimensions of the blocksize here. */

  FLA_Gemm_init( nb_alg, FLA_Obj_width( A ) );

  /* Pack B */
  /* Note: the idea here is that, optionally,
     -  B is packed, and/or
     -  B is scaled                                    
     If B needs not be packed, it is not packed.  If the multiplication
     by alpha happens elsewhere, no scaling occurs.
     The "NoTranspose" means that in the version of gemm being updated
     B is not transposed.  In packing, B could be transposed, if there
     is an advantage to this.  So, the "NoTranspose" means that input
     B is not transposed in the FLA_Gemm call.  */
 
  FLA_Gemm_pack_andor_scale_B( FLA_NO_TRANSPOSE, alpha, B );

  while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){

    b = min( FLA_Obj_length( AB ), nb_alg );

    FLA_Repart_2x1_to_3x1( AT,                &A0, 
                        /* ** */            /* ** */
                                              &A1, 
                           AB,                &A2,        b, FLA_BOTTOM );

    FLA_Repart_2x1_to_3x1( CT,                &C0, 
                        /* ** */            /* ** */
                                              &C1, 
                           CB,                &C2,        b, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    /* C1 = alpha * A1 * B + C1; */

    /* Pack C1 */
    /* Note: the idea here is that, optionally,
       -  packed space is provided for computing packed_C1 = alpha * A1 * B 
       If C1 needs not be packed, then C can just be returned by this routine. */
    FLA_Gemm_pack_C( FLA_NO_TRANSPOSE, C1 );

    /* Pack A */
    /* Note: the idea here is that, optionally,
        -  A is packed, and/or
        -  A is scaled                                    
	If A needs not be packed, it is not packed.  If the multiplication
	by alpha happens elsewhere, no scaling occurs. */
    FLA_Gemm_pack_andor_scale_A( FLA_NO_TRANSPOSE, alpha, A1 );

    /* Call the kernel routine */
    FLA_Gemm_kernel( alpha, A1, B, C1 );

    /* Unpack C1 */
    /* Note: the idea here is that, optionally,
       -  packed_C1 is added to C1, possibly scaled at this point. */
    FLA_Gemm_unpack_andor_scale_C( FLA_NO_TRANSPOSE, alpha, C1 );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x1_to_2x1( &AT,                A0, 
                                                  A1, 
                            /* ** */           /* ** */
                              &AB,                A2,     FLA_TOP );

    FLA_Cont_with_3x1_to_2x1( &CT,                C0, 
                                                  C1, 
                            /* ** */           /* ** */
                              &CB,                C2,     FLA_TOP );

  }

  /* Release the space used to pack A1 */
  /* Note: notice that the space provided for A1 can be recycled 
     everytime through the loop, which is why this call is outside the loop. 
     If the space is statically allocated, or A1 was not packed,
     this could be a no-op. */
  FLA_Gemm_release_pack_A( FLA_NO_TRANSPOSE, A1 );

//.........這裏部分代碼省略.........
開發者ID:pgawron,項目名稱:tlash,代碼行數:101,代碼來源:FLA_Gemm_pp_nn_var1.c

示例11: FLA_Syr2k_un_unb_var4

FLA_Error FLA_Syr2k_un_unb_var4( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C )
{
    FLA_Obj AT,              A0,
            AB,              a1t,
            A2;

    FLA_Obj BT,              B0,
            BB,              b1t,
            B2;

    FLA_Obj CTL,   CTR,      C00,  c01,     C02,
            CBL,   CBR,      c10t, gamma11, c12t,
            C20,  c21,     C22;


    FLA_Scalr_external( FLA_UPPER_TRIANGULAR, beta, C );

    FLA_Part_2x1( A,    &AT,
                  &AB,            0, FLA_TOP );

    FLA_Part_2x1( B,    &BT,
                  &BB,            0, FLA_TOP );

    FLA_Part_2x2( C,    &CTL, &CTR,
                  &CBL, &CBR,     0, 0, FLA_TL );

    while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ) {


        FLA_Repart_2x1_to_3x1( AT,                &A0,
                               /* ** */            /* ** */
                               &a1t,
                               AB,                &A2,        1, FLA_BOTTOM );

        FLA_Repart_2x1_to_3x1( BT,                &B0,
                               /* ** */            /* ** */
                               &b1t,
                               BB,                &B2,        1, FLA_BOTTOM );

        FLA_Repart_2x2_to_3x3( CTL, /**/ CTR,       &C00,  /**/ &c01,     &C02,
                               /* ************* */   /* ************************** */
                               &c10t, /**/ &gamma11, &c12t,
                               CBL, /**/ CBR,       &C20,  /**/ &c21,     &C22,
                               1, 1, FLA_BR );

        /*------------------------------------------------------------*/

        /* c01 = c01 + A0 * b1t' */
        FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, A0, b1t, FLA_ONE, c01 );

        /* c01 = c01 + B0 * a1t' */
        FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, B0, a1t, FLA_ONE, c01 );

        /* gamma11 = gamma11 + a1t * b1t' + b1t * a1t' */
        FLA_Dot2s_external( alpha, a1t, b1t, FLA_ONE, gamma11 );

        /*------------------------------------------------------------*/

        FLA_Cont_with_3x1_to_2x1( &AT,                A0,
                                  a1t,
                                  /* ** */           /* ** */
                                  &AB,                A2,     FLA_TOP );

        FLA_Cont_with_3x1_to_2x1( &BT,                B0,
                                  b1t,
                                  /* ** */           /* ** */
                                  &BB,                B2,     FLA_TOP );

        FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR,       C00,  c01,     /**/ C02,
                                  c10t, gamma11, /**/ c12t,
                                  /* ************** */  /* ************************ */
                                  &CBL, /**/ &CBR,       C20,  c21,     /**/ C22,
                                  FLA_TL );

    }

    return FLA_SUCCESS;
}
開發者ID:pgawron,項目名稱:tlash,代碼行數:78,代碼來源:FLA_Syr2k_un_unb_var4.c

示例12: FLASH_Axpy_hierarchy

FLA_Error FLASH_Axpy_hierarchy( int direction, FLA_Obj alpha, FLA_Obj F, FLA_Obj* H )
{
	// Once we get down to a submatrix whose elements are scalars, we are down
	// to our base case.
	if ( FLA_Obj_elemtype( *H ) == FLA_SCALAR )
	{
		// Depending on which top-level function invoked us, we either axpy
		// the source data in the flat matrix to the leaf-level submatrix of
		// the hierarchical matrix, or axpy the data in the hierarchical
		// submatrix to the flat matrix.
		if      ( direction == FLA_FLAT_TO_HIER )
		{
#ifdef FLA_ENABLE_SCC
			if ( FLA_is_owner() )
#endif
			FLA_Axpy_external( alpha, F, *H );
		}
		else if ( direction == FLA_HIER_TO_FLAT )
		{
#ifdef FLA_ENABLE_SCC
			if ( FLA_is_owner() )
#endif
			FLA_Axpy_external( alpha, *H, F );
		}
	}
	else
	{
		FLA_Obj HL,  HR,       H0,  H1,  H2;
		FLA_Obj FL,  FR,       F0,  F1,  F2;

		FLA_Obj H1T,           H01,
		        H1B,           H11,
		                       H21;
		FLA_Obj F1T,           F01,
		        F1B,           F11,
		                       F21;

		dim_t b_m;
		dim_t b_n;

		FLA_Part_1x2( *H,    &HL,  &HR,      0, FLA_LEFT );
		FLA_Part_1x2(  F,    &FL,  &FR,      0, FLA_LEFT );

		while ( FLA_Obj_width( HL ) < FLA_Obj_width( *H ) )
		{
			FLA_Repart_1x2_to_1x3( HL,  /**/ HR,        &H0, /**/ &H1, &H2,
			                       1, FLA_RIGHT );

			// Get the scalar width of H1 and use that to determine the
			// width of F1.
			b_n = FLASH_Obj_scalar_width( H1 );

			FLA_Repart_1x2_to_1x3( FL,  /**/ FR,        &F0, /**/ &F1, &F2,
			                       b_n, FLA_RIGHT );

			// -------------------------------------------------------------

			FLA_Part_2x1( H1,    &H1T,
			                     &H1B,       0, FLA_TOP );
			FLA_Part_2x1( F1,    &F1T,
			                     &F1B,       0, FLA_TOP );

			while ( FLA_Obj_length( H1T ) < FLA_Obj_length( H1 ) )
			{
				FLA_Repart_2x1_to_3x1( H1T,               &H01,
				                    /* ** */            /* *** */
				                                          &H11,
				                       H1B,               &H21,        1, FLA_BOTTOM );

				// Get the scalar length of H11 and use that to determine the
				// length of F11.
				b_m = FLASH_Obj_scalar_length( H11 );

				FLA_Repart_2x1_to_3x1( F1T,               &F01,
				                    /* ** */            /* *** */
				                                          &F11,
				                       F1B,               &F21,        b_m, FLA_BOTTOM );
				// -------------------------------------------------------------

				// Recursively axpy between F11 and H11.
				FLASH_Axpy_hierarchy( direction, alpha, F11,
				                      FLASH_OBJ_PTR_AT( H11 ) );

				// -------------------------------------------------------------

				FLA_Cont_with_3x1_to_2x1( &H1T,               H01,
				                                              H11,
				                        /* ** */           /* *** */
				                          &H1B,               H21,     FLA_TOP );
				FLA_Cont_with_3x1_to_2x1( &F1T,               F01,
				                                              F11,
				                        /* ** */           /* *** */
				                          &F1B,               F21,     FLA_TOP );
			}

			// -------------------------------------------------------------

			FLA_Cont_with_1x3_to_1x2( &HL,  /**/ &HR,        H0, H1, /**/ H2,
			                          FLA_LEFT );
			FLA_Cont_with_1x3_to_1x2( &FL,  /**/ &FR,        F0, F1, /**/ F2,
//.........這裏部分代碼省略.........
開發者ID:pgawron,項目名稱:tlash,代碼行數:101,代碼來源:FLASH_Axpy_other.c

示例13: FLA_Sylv_nh_blk_var16

FLA_Error FLA_Sylv_nh_blk_var16( FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t* cntl )
{
  FLA_Obj ATL,   ATR,      A00, A01, A02, 
          ABL,   ABR,      A10, A11, A12,
                           A20, A21, A22;

  FLA_Obj CT,              C0,
          CB,              C1,
                           C2;

  dim_t b;

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     0, 0, FLA_BR );

  FLA_Part_2x1( C,    &CT, 
                      &CB,            0, FLA_BOTTOM );

  while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){

    b = FLA_Determine_blocksize( CT, FLA_TOP, FLA_Cntl_blocksize( cntl ) );

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, &A01, /**/ &A02,
                                                &A10, &A11, /**/ &A12,
                        /* ************* */   /* ******************** */
                           ABL, /**/ ABR,       &A20, &A21, /**/ &A22,
                           b, b, FLA_TL );

    FLA_Repart_2x1_to_3x1( CT,                &C0, 
                                              &C1, 
                        /* ** */            /* ** */
                           CB,                &C2,        b, FLA_TOP );

    // Loop Invariant:
    // CT = CT - ATR * sylv( ABR, B', CB )
    // CB = sylv( ABR, B', CB )

    /*------------------------------------------------------------*/

    // C1 = sylv( A11, B', C1 );
    FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, 
                       isgn, A11, B, C1, scale,
                       FLA_Cntl_sub_sylv1( cntl ) );

    // C0 = C0 - A01 * C1;
    FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
                       FLA_MINUS_ONE, A01, C1, FLA_ONE, C0,
                       FLA_Cntl_sub_gemm1( cntl ) );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, /**/ A01, A02,
                            /* ************** */  /* ****************** */
                                                     A10, /**/ A11, A12,
                              &ABL, /**/ &ABR,       A20, /**/ A21, A22,
                              FLA_BR );

    FLA_Cont_with_3x1_to_2x1( &CT,                C0, 
                            /* ** */           /* ** */
                                                  C1, 
                              &CB,                C2,     FLA_BOTTOM );

  }

  return FLA_SUCCESS;
}
開發者ID:anaptyxis,項目名稱:libflame,代碼行數:66,代碼來源:FLA_Sylv_nh_blk_var16.c

示例14: Symm_blk_var1

int Symm_blk_var1( FLA_Obj A, FLA_Obj B, FLA_Obj C, int nb_alg )
{
  FLA_Obj ATL,   ATR,      A00, A01, A02, 
          ABL,   ABR,      A10, A11, A12,
                           A20, A21, A22;

  FLA_Obj BT,              B0,
          BB,              B1,
                           B2;

  FLA_Obj CT,              C0,
          CB,              C1,
                           C2;

  int b;

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     0, 0, FLA_TL );

  FLA_Part_2x1( B,    &BT, 
                      &BB,            0, FLA_TOP );

  FLA_Part_2x1( C,    &CT, 
                      &CB,            0, FLA_TOP );

  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){

    b = min( FLA_Obj_length( ABR ), nb_alg );

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                        /* ************* */   /* ******************** */
                                                &A10, /**/ &A11, &A12,
                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                           b, b, FLA_BR );

    FLA_Repart_2x1_to_3x1( BT,                &B0, 
                        /* ** */            /* ** */
                                              &B1, 
                           BB,                &B2,        b, FLA_BOTTOM );

    FLA_Repart_2x1_to_3x1( CT,                &C0, 
                        /* ** */            /* ** */
                                              &C1, 
                           CB,                &C2,        b, FLA_BOTTOM );

    /*------------------------------------------------------------*/
    C1 = C1 + A10*B0 + A11*B1;
	C0 = C0 + A10*B1;
    /*                       update line 1                        */
    /*                             :                              */
    /*                       update line n                        */

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                     A10, A11, /**/ A12,
                            /* ************** */  /* ****************** */
                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                              FLA_TL );

    FLA_Cont_with_3x1_to_2x1( &BT,                B0, 
                                                  B1, 
                            /* ** */           /* ** */
                              &BB,                B2,     FLA_TOP );

    FLA_Cont_with_3x1_to_2x1( &CT,                C0, 
                                                  C1, 
                            /* ** */           /* ** */
                              &CB,                C2,     FLA_TOP );

  }

  return FLA_SUCCESS;
}
開發者ID:ebeweber,項目名稱:CS378-Invariant-Project,代碼行數:74,代碼來源:Symm_blk_var1.c

示例15: FLA_Syrk_un_blk_var3

FLA_Error FLA_Syrk_un_blk_var3( FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_syrk_t* cntl )
{
  FLA_Obj AT,              A0,
          AB,              A1,
                           A2;

  FLA_Obj CTL,   CTR,      C00, C01, C02, 
          CBL,   CBR,      C10, C11, C12,
                           C20, C21, C22;

  dim_t b;

  FLA_Part_2x1( A,    &AT, 
                      &AB,            0, FLA_BOTTOM );

  FLA_Part_2x2( C,    &CTL, &CTR,
                      &CBL, &CBR,     0, 0, FLA_BR );

  while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){

    b = FLA_Determine_blocksize( AT, FLA_TOP, FLA_Cntl_blocksize( cntl ) );

    FLA_Repart_2x1_to_3x1( AT,                &A0, 
                                              &A1, 
                        /* ** */            /* ** */
                           AB,                &A2,        b, FLA_TOP );

    FLA_Repart_2x2_to_3x3( CTL, /**/ CTR,       &C00, &C01, /**/ &C02,
                                                &C10, &C11, /**/ &C12,
                        /* ************* */   /* ******************** */
                           CBL, /**/ CBR,       &C20, &C21, /**/ &C22,
                           b, b, FLA_TL );

    /*------------------------------------------------------------*/

    /* C12 = C12 + A1 * A2' */
    FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, 
                       alpha, A1, A2, beta, C12,
                       FLA_Cntl_sub_gemm( cntl ) );

    /* C11 = C11 + A1 * A1' */
    FLA_Syrk_internal( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, 
                       alpha, A1, beta, C11,
                       FLA_Cntl_sub_syrk( cntl ) );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x1_to_2x1( &AT,                A0, 
                            /* ** */           /* ** */
                                                  A1, 
                              &AB,                A2,     FLA_BOTTOM );

    FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR,       C00, /**/ C01, C02,
                            /* ************** */  /* ****************** */
                                                     C10, /**/ C11, C12,
                              &CBL, /**/ &CBR,       C20, /**/ C21, C22,
                              FLA_BR );

  }

  return FLA_SUCCESS;
}
開發者ID:anaptyxis,項目名稱:libflame,代碼行數:62,代碼來源:FLA_Syrk_un_blk_var3.c


注:本文中的FLA_Repart_2x1_to_3x1函數示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。