当前位置: 首页>>代码示例>>C++>>正文


C++ Timer::reset方法代码示例

本文整理汇总了C++中kokkos::impl::Timer::reset方法的典型用法代码示例。如果您正苦于以下问题:C++ Timer::reset方法的具体用法?C++ Timer::reset怎么用?C++ Timer::reset使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在kokkos::impl::Timer的用法示例。


在下文中一共展示了Timer::reset方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: Loop

void Loop(int loop, int test, const char* type_name) {
  LoopVariant<T>(loop,test);

  Kokkos::Impl::Timer timer;
  T res = LoopVariant<T>(loop,test);
  double time1 = timer.seconds();

  timer.reset();
  T resNonAtomic = LoopVariantNonAtomic<T>(loop,test);
  double time2 = timer.seconds();

  timer.reset();
  T resSerial = LoopVariantSerial<T>(loop,test);
  double time3 = timer.seconds();

  time1*=1e6/loop;
  time2*=1e6/loop;
  time3*=1e6/loop;
  textcolor_standard();
  bool passed = true;
  if(resSerial!=res) passed = false;
  if(!passed) textcolor(RESET,BLACK,YELLOW);
  printf("%s Test %i %s  --- Loop: %i Value (S,A,NA): %e %e %e Time: %7.4e %7.4e %7.4e Size of Type %i)",type_name,test,passed?"PASSED":"FAILED",loop,1.0*resSerial,1.0*res,1.0*resNonAtomic,time1,time2,time3,(int)sizeof(T));
  if(!passed) textcolor_standard();
  printf("\n");
}
开发者ID:UoB-HPC,项目名称:TeaLeaf-Kokkos,代码行数:26,代码来源:test_atomic.cpp

示例2: test_global_to_local_ids

size_t test_global_to_local_ids(unsigned num_ids, unsigned capacity, unsigned num_find_iterations)
{

  typedef Device execution_space;
  typedef typename execution_space::size_type size_type;

  typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
  typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;

  double elasped_time = 0;
  Kokkos::Impl::Timer timer;

  local_id_view local_2_global("local_ids", num_ids);
  global_id_view global_2_local(capacity);

  int shiftw = 15;

  //create
  elasped_time = timer.seconds();
  std::cout << std::setw(shiftw) <<  "allocate: " <<  elasped_time << std::endl;
  timer.reset();

  // generate unique ids
  {
    generate_ids<Device> gen(local_2_global);
  }

  // generate
  elasped_time = timer.seconds();
  std::cout << std::setw(shiftw) << "generate: " <<  elasped_time << std::endl;
  timer.reset();

  {
    fill_map<Device> fill(global_2_local, local_2_global);
  }

  // fill
  elasped_time = timer.seconds();
  std::cout << std::setw(shiftw) << "fill: " <<  elasped_time << std::endl;
  timer.reset();


  size_t num_errors = global_2_local.failed_insert();

  if (num_errors == 0u) {
    for (unsigned i=0; i<num_find_iterations; ++i)
    {
      find_test<Device> find(global_2_local, local_2_global,num_errors);
    }

    // find
    elasped_time = timer.seconds();
    std::cout << std::setw(shiftw) << "lookup: " <<  elasped_time << std::endl;
  }
  else {
    std::cout << "    !!! Fill Failed !!!" << std::endl;
  }

  return num_errors;
}
开发者ID:BrianMoths,项目名称:lammps,代码行数:60,代码来源:G2L.hpp

示例3: main

int main(int narg, char* arg[]) {
  Kokkos::initialize(narg,arg);

  int size = 1000000;

  // Create DualViews. This will allocate on both the device and its
  // host_mirror_device.
  idx_type idx("Idx",size,64);
  view_type dest("Dest",size);
  view_type src("Src",size);

  srand(134231);

  // Get a reference to the host view of idx directly (equivalent to
  // idx.view<idx_type::host_mirror_device_type>() )
  idx_type::t_host h_idx = idx.h_view;
  for (int i = 0; i < size; ++i) {
    for (view_type::size_type j=0; j < h_idx.dimension_1 (); ++j) {
      h_idx(i,j) = (size + i + (rand () % 500 - 250)) % size;
    }
  }

  // Mark idx as modified on the host_mirror_device_type so that a
  // sync to the device will actually move data.  The sync happens in
  // the functor's constructor.
  idx.modify<idx_type::host_mirror_device_type>();

  // Run on the device.  This will cause a sync of idx to the device,
  // since it was marked as modified on the host.
  Kokkos::Impl::Timer timer;
  Kokkos::parallel_for(size,localsum<view_type::device_type>(idx,dest,src));
  Kokkos::fence();
  double sec1_dev = timer.seconds();

  timer.reset();
  Kokkos::parallel_for(size,localsum<view_type::device_type>(idx,dest,src));
  Kokkos::fence();
  double sec2_dev = timer.seconds();

  // Run on the host (could be the same as device).  This will cause a
  // sync back to the host of dest.  Note that if the Device is CUDA,
  // the data layout will not be optimal on host, so performance is
  // lower than what it would be for a pure host compilation.
  timer.reset();
  Kokkos::parallel_for(size,localsum<view_type::host_mirror_device_type>(idx,dest,src));
  Kokkos::fence();
  double sec1_host = timer.seconds();

  timer.reset();
  Kokkos::parallel_for(size,localsum<view_type::host_mirror_device_type>(idx,dest,src));
  Kokkos::fence();
  double sec2_host = timer.seconds();

  printf("Device Time with Sync: %lf without Sync: %lf \n",sec1_dev,sec2_dev);
  printf("Host   Time with Sync: %lf without Sync: %lf \n",sec1_host,sec2_host);

  Kokkos::finalize();
}
开发者ID:rorypeck,项目名称:trilinos,代码行数:58,代码来源:dual_view.cpp

示例4: test_global_to_local_ids

void test_global_to_local_ids(unsigned num_ids)
{

  typedef Device device_type;
  typedef typename device_type::size_type size_type;

  typedef Kokkos::View<uint32_t*,device_type> local_id_view;
  typedef Kokkos::UnorderedMap<uint32_t,size_type,device_type> global_id_view;

  //size
  std::cout << num_ids << ", ";

  double elasped_time = 0;
  Kokkos::Impl::Timer timer;

  local_id_view local_2_global("local_ids", num_ids);
  global_id_view global_2_local((3u*num_ids)/2u);

  //create
  elasped_time = timer.seconds();
  std::cout << elasped_time << ", ";
  timer.reset();

  // generate unique ids
  {
    generate_ids<Device> gen(local_2_global);
  }
  Device::fence();
  // generate
  elasped_time = timer.seconds();
  std::cout << elasped_time << ", ";
  timer.reset();

  {
    fill_map<Device> fill(global_2_local, local_2_global);
  }
  Device::fence();

  // fill
  elasped_time = timer.seconds();
  std::cout << elasped_time << ", ";
  timer.reset();


  size_t num_errors = 0;
  for (int i=0; i<100; ++i)
  {
    find_test<Device> find(global_2_local, local_2_global,num_errors);
  }
  Device::fence();

  // find
  elasped_time = timer.seconds();
  std::cout << elasped_time << std::endl;

  ASSERT_EQ( num_errors, 0u);
}
开发者ID:yoshioda,项目名称:trilinos,代码行数:57,代码来源:TestGlobal2LocalIds.hpp

示例5: main

int main(int narg, char* arg[]) {
  Kokkos::initialize(narg,arg);

  int size = 1000000;

  // Create Views
  idx_type idx("Idx",size,64);
  view_type dest("Dest",size);
  view_type src("Src",size);

  srand(134231);

  // When using UVM Cuda views can be accessed on the Host directly
  for(int i=0; i<size; i++) {
    for(int j=0; j<idx.dimension_1(); j++)
      idx(i,j) = (size + i + (rand()%500 - 250))%size;
  }

  Kokkos::fence();
  // Run on the device
  // This will cause a sync of idx to the device since it was modified on the host
  Kokkos::Impl::Timer timer;
  Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
  Kokkos::fence();
  double sec1_dev = timer.seconds();

  // No data transfer will happen now, since nothing is accessed on the host
  timer.reset();
  Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
  Kokkos::fence();
  double sec2_dev = timer.seconds();

  // Run on the host
  // This will cause a sync back to the host of dest which was changed on the device
  // Compare runtime here with the dual_view example: dest will be copied back in 4k blocks
  // when they are accessed the first time during the parallel_for. Due to the latency of a memcpy
  // this gives lower effective bandwidth when doing a manual copy via dual views
  timer.reset();
  Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src));
  Kokkos::fence();
  double sec1_host = timer.seconds();

  // No data transfers will happen now
  timer.reset();
  Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src));
  Kokkos::fence();
  double sec2_host = timer.seconds();



  printf("Device Time with Sync: %lf without Sync: %lf \n",sec1_dev,sec2_dev);
  printf("Host   Time with Sync: %lf without Sync: %lf \n",sec1_host,sec2_host);

  Kokkos::finalize();
}
开发者ID:BrianMoths,项目名称:lammps,代码行数:55,代码来源:uvm_example.cpp

示例6: exampleCholByBlocks

  KOKKOS_INLINE_FUNCTION
  int exampleCholByBlocks(const string file_input,
                          const int nthreads,
                          const int max_task_dependence,
                          const int team_size,
                          const bool verbose) {
    typedef ValueType   value_type;
    typedef OrdinalType ordinal_type;
    typedef SizeType    size_type;

    typedef TaskFactory<Kokkos::Experimental::TaskPolicy<SpaceType>,
      Kokkos::Experimental::Future<int,SpaceType> > TaskFactoryType;

    typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> CrsMatrixBaseType;
    typedef GraphHelper_Scotch<CrsMatrixBaseType> GraphHelperType;

    typedef CrsMatrixView<CrsMatrixBaseType> CrsMatrixViewType;
    typedef TaskView<CrsMatrixViewType,TaskFactoryType> CrsTaskViewType;

    typedef CrsMatrixBase<CrsTaskViewType,ordinal_type,size_type,SpaceType,MemoryTraits> CrsHierMatrixBaseType;

    typedef CrsMatrixView<CrsHierMatrixBaseType> CrsHierMatrixViewType;
    typedef TaskView<CrsHierMatrixViewType,TaskFactoryType> CrsHierTaskViewType;

    int r_val = 0;

    Kokkos::Impl::Timer timer;
    double t = 0.0;

    cout << "CholByBlocks:: import input file = " << file_input << endl;
    CrsMatrixBaseType AA("AA");
    {
      timer.reset();

      ifstream in;
      in.open(file_input);
      if (!in.good()) {
        cout << "Failed in open the file: " << file_input << endl;
        return ++r_val;
      }
      AA.importMatrixMarket(in);

      t = timer.seconds();

      if (verbose)
        cout << AA << endl;
    }
    cout << "CholByBlocks:: import input file::time = " << t << endl;


    cout << "CholByBlocks:: reorder the matrix" << endl;
    CrsMatrixBaseType UU("UU");     // permuted base matrix
    CrsHierMatrixBaseType HU("HU"); // hierarchical matrix of views
    {
      timer.reset();

      typename GraphHelperType::size_type_array rptr(AA.Label()+"Graph::RowPtrArray", AA.NumRows() + 1);
      typename GraphHelperType::ordinal_type_array cidx(AA.Label()+"Graph::ColIndexArray", AA.NumNonZeros());

      AA.convertGraph(rptr, cidx);
      GraphHelperType S(AA.Label()+"ScotchHelper",
                        AA.NumRows(),
                        rptr,
                        cidx);
      S.computeOrdering();

      CrsMatrixBaseType PA("Permuted AA");
      PA.copy(S.PermVector(), S.InvPermVector(), AA);

      UU.copy(Uplo::Upper, PA);

      CrsMatrixHelper::flat2hier(Uplo::Upper, UU, HU,
                                 S.NumBlocks(),
                                 S.RangeVector(),
                                 S.TreeVector());

      for (ordinal_type k=0;k<HU.NumNonZeros();++k)
        HU.Value(k).fillRowViewArray();

      t = timer.seconds();

      if (verbose)
        cout << UU << endl;
    }
    cout << "CholByBlocks:: reorder the matrix::time = " << t << endl;

    const size_t max_concurrency = 16384;
    cout << "CholByBlocks:: max concurrency = " << max_concurrency << endl;

    const size_t max_task_size = 3*sizeof(CrsTaskViewType)+128;
    cout << "CholByBlocks:: max task size   = " << max_task_size << endl;

    typename TaskFactoryType::policy_type policy(max_concurrency,
                                                 max_task_size,
                                                 max_task_dependence,
                                                 team_size);

    TaskFactoryType::setMaxTaskDependence(max_task_dependence);
    TaskFactoryType::setPolicy(&policy);

//.........这里部分代码省略.........
开发者ID:agrippa,项目名称:Trilinos,代码行数:101,代码来源:example_chol_by_blocks.hpp

示例7: exampleSymbolicFactor

  KOKKOS_INLINE_FUNCTION
  int exampleSymbolicFactor(const string file_input,
                            const int treecut,
                            const int minblksize,
                            const int seed,
                            const int fill_level,
                            const int league_size,
                            const bool reorder,
                            const bool verbose) {
    typedef ValueType   value_type;
    typedef OrdinalType ordinal_type;
    typedef SizeType    size_type;

    typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> CrsMatrixBaseType;
    typedef GraphHelper_Scotch<CrsMatrixBaseType> GraphHelperType;
    typedef SymbolicFactorHelper<CrsMatrixBaseType> SymbolicFactorHelperType;

    int r_val = 0;

    Kokkos::Impl::Timer timer;
    double t = 0.0;

    cout << "SymbolicFactor:: import input file = " << file_input << endl;
    CrsMatrixBaseType AA("AA");
    {
      timer.reset();

      ifstream in;
      in.open(file_input);
      if (!in.good()) {
        cout << "Failed in open the file: " << file_input << endl;
        return ++r_val;
      }
      AA.importMatrixMarket(in);
      t = timer.seconds();

      cout << "SymbolicFactor:: AA nnz = " << AA.NumNonZeros() << endl;
      if (verbose)
        cout << AA << endl;
    }
    cout << "SymbolicFactor:: import input file::time = " << t << endl;

    CrsMatrixBaseType PA("Permuted AA");
    GraphHelperType S(AA, seed);
    if (reorder) {
      timer.reset();

      S.computeOrdering(treecut, minblksize);

      PA.copy(S.PermVector(), S.InvPermVector(), AA);

      t = timer.seconds();

      if (verbose)
        cout << S << endl
             << PA << endl;
    } else {
      PA = AA;

      t = 0.0;
    }
    cout << "SymbolicFactor:: reorder the matrix::time = " << t << endl;


    CrsMatrixBaseType UU("UU");
    {
      timer.reset();

      SymbolicFactorHelperType symbolic(PA, league_size);
      symbolic.createNonZeroPattern(fill_level, Uplo::Upper, UU);

      t = timer.seconds();

      cout << "SymbolicFactor:: UU nnz = " << UU.NumNonZeros() << endl;

      if (verbose) {
        cout << symbolic << endl;
        cout << UU << endl;
      }
    }
    cout << "SymbolicFactor:: factorize the matrix::time = " << t << endl;

    return r_val;
  }
开发者ID:rainiscold,项目名称:trilinos,代码行数:84,代码来源:example_symbolic_factor.hpp

示例8: printf

  BASKER_INLINE
  int Basker<Int, Entry, Exe_Space>::factor_inc_lvl(Int option)
  {

    printf("Factor Inc Level Called \n");

    gn = A.ncol;
    gm = A.nrow;

    if(Options.btf == BASKER_TRUE)
      {

	//JDB: We can change this for the new inteface

	//call reference copy constructor
	gn = A.ncol;
	gm = A.nrow;
	A = BTF_A; 
	//printf("\n\n Switching A, newsize: %d \n",
	//   A.ncol);
	//printMTX("A_FACTOR.mtx", A);
      }
   

    //Spit into Domain and Sep
    //----------------------Domain-------------------------//
    #ifdef BASKER_KOKKOS

    //====TIMER==
    #ifdef BASKER_TIME
    Kokkos::Impl::Timer       timer;
    #endif
    //===TIMER===

    typedef Kokkos::TeamPolicy<Exe_Space>        TeamPolicy;

    if(btf_tabs_offset != 0)
      {

	kokkos_nfactor_domain_inc_lvl <Int,Entry,Exe_Space>
	  domain_nfactor(this);
	Kokkos::parallel_for(TeamPolicy(num_threads,1), 
			     domain_nfactor);
	Kokkos::fence();
    

	//=====Check for error======
	while(true)
	  {
	INT_1DARRAY thread_start;
	MALLOC_INT_1DARRAY(thread_start, num_threads+1);
	init_value(thread_start, num_threads+1, 
		   (Int) BASKER_MAX_IDX);
	int nt = nfactor_domain_error(thread_start);
	if(nt == BASKER_SUCCESS)
	  {
	    break;
	  }
	else
	  {
	    printf("restart \n");
	    kokkos_nfactor_domain_remalloc <Int, Entry, Exe_Space>
	      diag_nfactor_remalloc(this, thread_start);
	    Kokkos::parallel_for(TeamPolicy(num_threads,1),
				 diag_nfactor_remalloc);
	    Kokkos::fence();
	  }
      }//end while
   

    //====TIMER===
    #ifdef BASKER_TIME
    printf("Time DOMAIN: %f \n", timer.seconds());
    timer.reset();
    #endif
    //====TIMER====
    

    #else// else basker_kokkos
    #pragma omp parallel
    {


    }//end omp parallel
    #endif //end basker_kokkos

      }
    //-------------------End--Domian--------------------------//

   
    //---------------------------Sep--------------------------//

    
    
    if(btf_tabs_offset != 0)
      {
        //for(Int l=1; l<=1; l++)
       for(Int l=1; l <= tree.nlvls; l++)
      {

//.........这里部分代码省略.........
开发者ID:cooperrc,项目名称:Trilinos,代码行数:101,代码来源:basker_nfactor_inc.hpp

示例9: exampleCholUnblocked

  KOKKOS_INLINE_FUNCTION
  int exampleCholUnblocked(const string file_input,
                           const int max_task_dependence,
                           const int team_size,
                           const int algo,
                           const int variant,
                           const bool verbose) {
    typedef ValueType   value_type;
    typedef OrdinalType ordinal_type;
    typedef SizeType    size_type;

    typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> CrsMatrixBaseType;
    typedef CrsMatrixView<CrsMatrixBaseType> CrsMatrixViewType;

    typedef TaskFactory<Kokkos::Experimental::TaskPolicy<SpaceType>,
      Kokkos::Experimental::Future<int,SpaceType> > TaskFactoryType;

    typedef TaskView<CrsMatrixViewType,TaskFactoryType> CrsTaskViewType;
    
    int r_val = 0;

    Kokkos::Impl::Timer timer;
    double t = 0.0;

    cout << "CholUnblocked:: import input file = " << file_input << endl;        
    CrsMatrixBaseType AA("AA"), UU("UU");    
    {
      timer.reset();

      ifstream in;
      in.open(file_input);
      if (!in.good()) {
        cout << "Failed in open the file: " << file_input << endl;
        return ++r_val;
      }
      AA.importMatrixMarket(in);

      UU.copy(Uplo::Upper, AA);

      t = timer.seconds();

      if (verbose)
        cout << UU << endl;
    }
    cout << "CholUnblocked:: import input file::time = " << t << endl;        

#ifdef __USE_FIXED_TEAM_SIZE__
    typename TaskFactoryType::policy_type policy(max_task_dependence);
#else
    typename TaskFactoryType::policy_type policy(max_task_dependence, team_size);
#endif
    TaskFactoryType::setMaxTaskDependence(max_task_dependence);
    TaskFactoryType::setPolicy(&policy);

    cout << "CholUnblocked:: factorize the matrix" << endl;
    CrsTaskViewType U(&UU);
    U.fillRowViewArray();
    {
      timer.reset();
    
      typename TaskFactoryType::future_type future;
      switch (algo) {
      case AlgoChol::UnblockedOpt: {
        if (variant == Variant::One)
          future = TaskFactoryType::Policy().create_team(Chol<Uplo::Upper,AlgoChol::UnblockedOpt,Variant::One>
                                                         ::TaskFunctor<CrsTaskViewType>(U), 0);
        else if (variant == Variant::Two)
          future = TaskFactoryType::Policy().create_team(Chol<Uplo::Upper,AlgoChol::UnblockedOpt,Variant::Two>
                                                         ::TaskFunctor<CrsTaskViewType>(U), 0);
        else {
          ERROR(">> Not supported algorithm variant");          
        }
        break;
      }
      case AlgoChol::Dummy: {
        future = TaskFactoryType::Policy().create_team(Chol<Uplo::Upper,AlgoChol::Dummy>
                                                       ::TaskFunctor<CrsTaskViewType>(U), 0);
        break;
      }
      default:
        ERROR(">> Not supported algorithm");
        break;
      }
      TaskFactoryType::Policy().spawn(future);
      Kokkos::Experimental::wait(TaskFactoryType::Policy());

      t = timer.seconds();

      if (verbose)
        cout << UU << endl;
    }   
    cout << "CholUnblocked:: factorize the matrix::time = " << t << endl; 
    
    return r_val;
  }
开发者ID:ChiahungTai,项目名称:Trilinos,代码行数:95,代码来源:example_chol_unblocked.hpp

示例10: exampleMatrixMarket

  int exampleMatrixMarket(const std::string file_input,
                          const bool verbose) {
    typedef typename
      Kokkos::Impl::is_space<DeviceSpaceType>::host_mirror_space::execution_space HostSpaceType ;

    const bool detail = false;
    std::cout << "DeviceSpace::  "; DeviceSpaceType::print_configuration(std::cout, detail);
    std::cout << "HostSpace::    ";   HostSpaceType::print_configuration(std::cout, detail);
  
    typedef CrsMatrixBase<value_type,ordinal_type,size_type,HostSpaceType>   CrsMatrixBaseHostType;

    int r_val = 0;

    Kokkos::Impl::Timer timer;
    
    CrsMatrixBaseHostType AA("AA");
    timer.reset();
    {
      std::ifstream in;
      in.open(file_input);
      if (!in.good()) {
        std::cout << "Failed in open the file: " << file_input << std::endl;
        return -1;
      }
      MatrixMarket::read(AA, in);
    }
    double t_read = timer.seconds();
    
    timer.reset();
    {
      std::string file_output = "mm-test-output.mtx";
      std::ofstream out;
      out.open(file_output);
      if (!out.good()) {
        std::cout << "Failed in open the file: " << file_output << std::endl;
        return -1;
      }
      MatrixMarket::write(out, AA, "%% Test output");
    }
    double t_write = timer.seconds();

    {
      const auto prec = std::cout.precision();
      std::cout.precision(4);

      std::cout << std::scientific
                << "MatrixMarket:: dimension = " << AA.NumRows() << " x " << AA.NumCols() 
                << ", " << " nnz = " << AA.NumNonZeros() << ", "
                << "read = " << t_read << " [sec], "
                << "write = " << t_write << " [sec] "
                << std::endl;

      std::cout.unsetf(std::ios::scientific);
      std::cout.precision(prec);
    }
    
    if (verbose) {
      AA.showMe(std::cout) << std::endl;
    }

    CrsMatrixBaseHostType BB("BB");
    BB.createConfTo(AA);

    CrsMatrixTools::copy(BB, Uplo::Upper, 0, AA);
    if (verbose) {
      BB.setLabel("Copy::AA:Upper::0"); BB.showMe(std::cout) << std::endl;
    }

    CrsMatrixTools::copy(BB, Uplo::Upper, 1, AA);
    if (verbose) {
      BB.setLabel("Copy::AA:Upper::1"); BB.showMe(std::cout) << std::endl;    
    }

    CrsMatrixTools::copy(BB, Uplo::Lower, 0, AA);
    if (verbose) {
      BB.setLabel("Copy::AA:Lower::0"); BB.showMe(std::cout) << std::endl;    
    }

    CrsMatrixTools::copy(BB, Uplo::Lower, 1, AA);
    if (verbose) {
      BB.setLabel("Copy::AA:Lower::1"); BB.showMe(std::cout) << std::endl;    
    }

    return r_val;
  }
开发者ID:agrippa,项目名称:Trilinos,代码行数:85,代码来源:Tacho_ExampleMatrixMarket.hpp

示例11: exampleTriSolvePerformance

  KOKKOS_INLINE_FUNCTION
  int exampleTriSolvePerformance(const string file_input,
                                 const OrdinalType nrhs,
                                 const OrdinalType nb,
                                 const int niter,
                                 const int nthreads,
                                 const int max_task_dependence,
                                 const int team_size, 
                                 const bool team_interface,
                                 const bool skip_serial,
                                 const bool verbose) {
    typedef ValueType   value_type;
    typedef OrdinalType ordinal_type;
    typedef SizeType    size_type;

    typedef TaskFactory<Kokkos::Experimental::TaskPolicy<SpaceType>,
      Kokkos::Experimental::Future<int,SpaceType> > TaskFactoryType;

    typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> CrsMatrixBaseType;
    typedef GraphHelper_Scotch<CrsMatrixBaseType> GraphHelperType;

    typedef CrsMatrixView<CrsMatrixBaseType> CrsMatrixViewType;
    typedef TaskView<CrsMatrixViewType,TaskFactoryType> CrsTaskViewType;
    
    typedef CrsMatrixBase<CrsTaskViewType,ordinal_type,size_type,SpaceType,MemoryTraits> CrsHierMatrixBaseType;

    typedef CrsMatrixView<CrsHierMatrixBaseType> CrsHierMatrixViewType;
    typedef TaskView<CrsHierMatrixViewType,TaskFactoryType> CrsHierTaskViewType;

    typedef DenseMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> DenseMatrixBaseType;

    typedef DenseMatrixView<DenseMatrixBaseType> DenseMatrixViewType;
    typedef TaskView<DenseMatrixViewType,TaskFactoryType> DenseTaskViewType;

    typedef DenseMatrixBase<DenseTaskViewType,ordinal_type,size_type,SpaceType,MemoryTraits> DenseHierMatrixBaseType;

    typedef DenseMatrixView<DenseHierMatrixBaseType> DenseHierMatrixViewType;
    typedef TaskView<DenseHierMatrixViewType,TaskFactoryType> DenseHierTaskViewType;

    int r_val = 0;

    Kokkos::Impl::Timer timer;
    double 
      t_import = 0.0,
      t_reorder = 0.0,
      t_solve_seq = 0.0,
      t_solve_task = 0.0;
    const int start = -2;

    cout << "TriSolvePerformance:: import input file = " << file_input << endl;
    CrsMatrixBaseType AA("AA");
    {
      timer.reset();

      ifstream in;
      in.open(file_input);
      if (!in.good()) {
        cout << "Failed in open the file: " << file_input << endl;
        return ++r_val;
      }
      AA.importMatrixMarket(in);

      t_import = timer.seconds();

      if (verbose)
        cout << AA << endl;
    }
    cout << "TriSolvePerformance:: import input file::time = " << t_import << endl;

    CrsMatrixBaseType   UU("UU");
    DenseMatrixBaseType BB("BB",  AA.NumRows(), nrhs);

    cout << "TriSolvePerformance:: reorder the matrix and partition right hand side, nb = " << nb << endl;
    CrsHierMatrixBaseType   HU("HU");
    DenseHierMatrixBaseType HB("HB");
    {
      timer.reset();

      GraphHelperType S(AA);
      S.computeOrdering();

      CrsMatrixBaseType PA("Permuted AA");
      PA.copy(S.PermVector(), S.InvPermVector(), AA);

      UU.copy(Uplo::Upper, PA);

      CrsMatrixHelper::flat2hier(Uplo::Upper, UU, HU,
                                 S.NumBlocks(),
                                 S.RangeVector(),
                                 S.TreeVector());

      DenseMatrixHelper::flat2hier(BB, HB,
                                   S.NumBlocks(),
                                   S.RangeVector(),
                                   nb);

      t_reorder = timer.seconds();

      cout << "TriSolvePerformance:: Hier (dof, nnz) = " << HU.NumRows() << ", " << HU.NumNonZeros() << endl;

//.........这里部分代码省略.........
开发者ID:petsc,项目名称:Trilinos,代码行数:101,代码来源:example_tri_solve_performance.hpp

示例12: exampleDenseGemmByBlocks

  KOKKOS_INLINE_FUNCTION
  int exampleDenseGemmByBlocks(const OrdinalType mmin,
                               const OrdinalType mmax,
                               const OrdinalType minc,
                               const OrdinalType k,
                               const OrdinalType mb,
                               const int max_concurrency,
                               const int max_task_dependence,
                               const int team_size,
                               const int mkl_nthreads,
                               const bool check,
                               const bool verbose) {
    typedef ValueType   value_type;
    typedef OrdinalType ordinal_type;
    typedef SizeType    size_type;

    typedef TaskFactory<Kokkos::Experimental::TaskPolicy<SpaceType>,
      Kokkos::Experimental::Future<int,SpaceType> > TaskFactoryType;

    typedef DenseMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> DenseMatrixBaseType;

    typedef DenseMatrixView<DenseMatrixBaseType> DenseMatrixViewType;
    typedef TaskView<DenseMatrixViewType,TaskFactoryType> DenseTaskViewType;

    typedef DenseMatrixBase<DenseTaskViewType,ordinal_type,size_type,SpaceType,MemoryTraits> DenseHierMatrixBaseType;

    typedef DenseMatrixView<DenseHierMatrixBaseType> DenseHierMatrixViewType;
    typedef TaskView<DenseHierMatrixViewType,TaskFactoryType> DenseHierTaskViewType;

    int r_val = 0;

    Kokkos::Impl::Timer timer;
    double t = 0.0;

    cout << "DenseGemmByBlocks:: test matrices "
         <<":: mmin = " << mmin << " , mmax = " << mmax << " , minc = " << minc << " , k = "<< k << " , mb = " << mb << endl;

    const size_t max_task_size = (3*sizeof(DenseTaskViewType)+196); // when 128 error
    //cout << "max task size = "<< max_task_size << endl;
    typename TaskFactoryType::policy_type policy(max_concurrency,
                                                 max_task_size,
                                                 max_task_dependence, 
                                                 team_size);
    
    TaskFactoryType::setMaxTaskDependence(max_task_dependence);
    TaskFactoryType::setPolicy(&policy);

    ostringstream os;
    os.precision(3);
    os << scientific;

    for (ordinal_type m=mmin;m<=mmax;m+=minc) {
      os.str("");

      DenseMatrixBaseType AA, BB, CC("CC", m, m), CB("CB", m, m);

      if (ArgTransA == Trans::NoTranspose) 
        AA = DenseMatrixBaseType("AA", m, k); 
      else 
        AA = DenseMatrixBaseType("AA", k, m);
      
      if (ArgTransB == Trans::NoTranspose) 
        BB = DenseMatrixBaseType("BB", k, m);
      else 
        BB = DenseMatrixBaseType("BB", m, k);
      
      for (ordinal_type j=0;j<AA.NumCols();++j)
        for (ordinal_type i=0;i<AA.NumRows();++i)
          AA.Value(i,j) = 2.0*((value_type)rand()/(RAND_MAX)) - 1.0;
      
      for (ordinal_type j=0;j<BB.NumCols();++j)
        for (ordinal_type i=0;i<BB.NumRows();++i)
          BB.Value(i,j) = 2.0*((value_type)rand()/(RAND_MAX)) - 1.0;
      
      for (ordinal_type j=0;j<CC.NumCols();++j)
        for (ordinal_type i=0;i<CC.NumRows();++i)
          CC.Value(i,j) = 2.0*((value_type)rand()/(RAND_MAX)) - 1.0;
      CB.copy(CC);

      const double flop = get_flop_gemm<value_type>(m, m, k);

#ifdef HAVE_SHYLUTACHO_MKL
      mkl_set_num_threads(mkl_nthreads);
#endif

      os << "DenseGemmByBlocks:: m = " << m << " n = " << m << " k = " << k;
      if (check) {
        timer.reset();
        DenseTaskViewType A(&AA), B(&BB), C(&CB);
        Gemm<ArgTransA,ArgTransB,AlgoGemm::ExternalBlas>::invoke
          (TaskFactoryType::Policy(),
           TaskFactoryType::Policy().member_single(),
           1.0, A, B, 1.0, C);
        t = timer.seconds();
        os << ":: Serial Performance = " << (flop/t/1.0e9) << " [GFLOPs]  ";
      }

      {
        DenseHierMatrixBaseType HA, HB, HC;
        DenseMatrixHelper::flat2hier(AA, HA, mb, mb);
//.........这里部分代码省略.........
开发者ID:agrippa,项目名称:Trilinos,代码行数:101,代码来源:example_dense_gemm_by_blocks.hpp

示例13: exampleDenseMatrixBase

int exampleDenseMatrixBase(const ordinal_type mmin,
                           const ordinal_type mmax,
                           const ordinal_type minc,
                           const bool verbose) {
    typedef typename
    Kokkos::Impl::is_space<DeviceSpaceType>::host_mirror_space::execution_space HostSpaceType ;

    const bool detail = false;
    std::cout << "DeviceSpace::  ";
    DeviceSpaceType::print_configuration(std::cout, detail);
    std::cout << "HostSpace::    ";
    HostSpaceType::print_configuration(std::cout, detail);
    std::cout << std::endl;

    typedef DenseMatrixBase<value_type,ordinal_type,size_type,HostSpaceType>   DenseMatrixBaseHostType;
    typedef DenseMatrixBase<value_type,ordinal_type,size_type,DeviceSpaceType> DenseMatrixBaseDeviceType;

    int r_val = 0;

    Kokkos::Impl::Timer timer;

    std::cout << "DenseMatrixBase:: test matrices "
              <<":: mmin = " << mmin << " , mmax = " << mmax << " , minc = " << minc << std::endl;

    for (auto m=mmin; m<=mmax; m+=minc) {
        // random test matrix on host
        DenseMatrixBaseHostType TT("TT", m, m);
        for (ordinal_type j=0; j<TT.NumCols(); ++j) {
            for (ordinal_type i=0; i<TT.NumRows(); ++i)
                TT.Value(i,j) = 2.0*((value_type)std::rand()/(RAND_MAX)) - 1.0;
            TT.Value(j,j) = std::fabs(TT.Value(j,j));
        }
        if (verbose)
            TT.showMe(std::cout) << std::endl;

        DenseMatrixBaseDeviceType AA("AA");

        timer.reset();
        AA.mirror(TT);
        double t_mirror = timer.seconds();

        DenseMatrixBaseDeviceType BB("BB");
        BB.createConfTo(AA);

        timer.reset();
        DenseMatrixTools::copy(BB, AA);
        double t_copy = timer.seconds();

        // check
        DenseMatrixBaseHostType RR("RR");
        RR.createConfTo(BB);
        RR.mirror(BB);
        if (verbose)
            RR.showMe(std::cout) << std::endl;

        double err = 0.0;
        for (ordinal_type j=0; j<TT.NumCols(); ++j)
            for (ordinal_type i=0; i<TT.NumRows(); ++i)
                err += std::fabs(TT.Value(i,j) - RR.Value(i,j));

        {
            const auto prec = std::cout.precision();
            std::cout.precision(4);

            std::cout << std::scientific
                      << "DenseMatrixBase:: dimension = " << m << " x " << m << ", "
                      << "Mirroring to device  = " << t_mirror << " [sec], "
                      << "Elementwise copy on device = " << t_copy << " [sec], "
                      << "Error = " << err
                      << std::endl;

            std::cout.unsetf(std::ios::scientific);
            std::cout.precision(prec);
        }
    }

    return r_val;
}
开发者ID:uppatispr,项目名称:trilinos-official,代码行数:78,代码来源:Tacho_ExampleDenseMatrixBase.hpp

示例14: exampleICholUnblocked

  KOKKOS_INLINE_FUNCTION
  int exampleICholUnblocked(const string file_input,
                            const int max_task_dependence,
                            const int team_size,
                            const bool verbose) {
    typedef ValueType   value_type;
    typedef OrdinalType ordinal_type;
    typedef SizeType    size_type;

    typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> CrsMatrixBaseType;
    typedef CrsMatrixView<CrsMatrixBaseType> CrsMatrixViewType;

    typedef TaskTeamFactory<Kokkos::Experimental::TaskPolicy<SpaceType>,
      Kokkos::Experimental::Future<int,SpaceType>,
      Kokkos::Impl::TeamThreadRangeBoundariesStruct> TaskFactoryType;

    typedef ParallelFor ForType;
    
    typedef TaskView<CrsMatrixViewType,TaskFactoryType> CrsTaskViewType;
    
    int r_val = 0;

    Kokkos::Impl::Timer timer;
    double t = 0.0;

    cout << "ICholUnblocked:: import input file = " << file_input << endl;        
    CrsMatrixBaseType AA("AA"), UU("UU");    
    {
      timer.reset();

      ifstream in;
      in.open(file_input);
      if (!in.good()) {
        cout << "Failed in open the file: " << file_input << endl;
        return ++r_val;
      }
      AA.importMatrixMarket(in);

      UU.copy(Uplo::Upper, AA);

      t = timer.seconds();

      if (verbose)
        cout << UU << endl;
    }
    cout << "ICholUnblocked:: import input file::time = " << t << endl;        

#ifdef __USE_FIXED_TEAM_SIZE__
    typename TaskFactoryType::policy_type policy(max_task_dependence);
#else
    typename TaskFactoryType::policy_type policy(max_task_dependence, team_size);
#endif
    TaskFactoryType::setMaxTaskDependence(max_task_dependence);
    TaskFactoryType::setPolicy(&policy);

    cout << "ICholUnblocked:: factorize the matrix" << endl;
    CrsTaskViewType U(&UU);
    U.fillRowViewArray();
    {
      timer.reset();
    
      auto future = TaskFactoryType::Policy().create_team(IChol<Uplo::Upper,AlgoIChol::UnblockedOpt1>
                                                          ::TaskFunctor<ForType,CrsTaskViewType>(U), 0);
      TaskFactoryType::Policy().spawn(future);
      Kokkos::Experimental::wait(TaskFactoryType::Policy());

      t = timer.seconds();

      if (verbose)
        cout << UU << endl;
    }   
    cout << "ICholUnblocked:: factorize the matrix::time = " << t << endl; 
    
    return r_val;
  }
开发者ID:abhishek4747,项目名称:trilinos,代码行数:75,代码来源:example_ichol_unblocked.hpp

示例15: color

	void color(bool useConflictList, bool serialConflictResolution, bool ticToc){
		Ordinal numUncolored = _size; // on host
		double t, total = 0.0;
		Kokkos::Impl::Timer timer;

		if(useConflictList)
			_conflictType = CONFLICT_LIST;

		// While vertices to color, do speculative coloring.
		int iter = 0;
		for(iter = 0; (iter<20) && (numUncolored>0); iter++){
			std::cout<< "Start iteration " << iter << std::endl;

			// First color greedy speculatively, some conflicts expected
			this -> colorGreedy();
			ExecSpace::fence();
			if(ticToc){
				t = timer.seconds();
				total += t;
				std::cout << "Time speculative greedy phase " << iter << " : " << std::endl;
				timer.reset();
			}

#ifdef DEBUG
			// UVM required - will be slow!
			printf("\n 100 first vertices: ");
			for(int i = 0; i < 100; i++){
				printf(" %i", _colors[i]);
			}
			printf("\n");
#endif

			// Check for conflicts (parallel), find vertices to recolor
			numUncolored = this -> findConflicts();

			ExecSpace::fence();
			if(ticToc){
			t = timer.seconds();
			total += t;
			std::cout << "Time conflict detection " << iter << " : " << t << std::endl;
			timer.reset();
			}
			if (serialConflictResolution) break; // Break after first iteration
/*			if(_conflictType == CONFLICT_LIST){
				array_type temp = _vertexList;
				_vertexList = _recolorList;
				_vertexListLength() = _recolorListLength();
				_recolorList = temp;
				_recolorListLength() = 0;
			}
*/			if(_conflictType == CONFLICT_LIST){
				array_type temp = _vertexList;
				_vertexList = _recolorList;
				host_vertexListLength() = host_recolorListLength();
				_recolorList = temp;
				host_recolorListLength() = 0;
				Kokkos::deep_copy(_vertexListLength, host_vertexListLength);
				Kokkos::deep_copy(_recolorListLength, host_recolorListLength);
			}
		}

		std::cout << "Number of coloring iterations: " << iter << std::endl;

		if(numUncolored > 0){
			// Resolve conflicts by recolor in serial
			this -> resolveConflicts();
			ExecSpace::fence();
			if(ticToc){
				t = timer.seconds();
				total += t;
				std::cout << "Time conflict resolution: " << t << std::endl;
				std::cout << "Total time: " << total << std::endl;
			}
		}
	}
开发者ID:zabookey,项目名称:KHPCG3.0,代码行数:75,代码来源:Coloring.cpp


注:本文中的kokkos::impl::Timer::reset方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。