C++ OptionParser::getOptionInt方法代码示例

本文整理汇总了C++中OptionParser::getOptionInt方法的典型用法代码示例。如果您正苦于以下问题：C++ OptionParser::getOptionInt方法的具体用法？C++ OptionParser::getOptionInt怎么用？C++ OptionParser::getOptionInt使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类OptionParser的用法示例。

在下文中一共展示了OptionParser::getOptionInt方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: InvalidArgValue

// validate stencil-independent values
void
CheckOptions( const OptionParser& opts )
{
    // check matrix dimensions - must be 2d, must be positive
    std::vector<long long> arrayDims = opts.getOptionVecInt( "customSize" );
    if( arrayDims.size() != 2 )
    {
        throw InvalidArgValue( "overall size must have two dimensions" );
    }
    if( (arrayDims[0] < 0) || (arrayDims[1] < 0) )
    {
        throw InvalidArgValue( "each size dimension must be positive" );
    }

    // validation error threshold must be positive
    float valThreshold = opts.getOptionFloat( "val-threshold" );
    if( valThreshold <= 0.0f )
    {
        throw InvalidArgValue( "validation threshold must be positive" );
    }

    // number of validation errors to print must be non-negative
    int nErrsToPrint = opts.getOptionInt( "val-print-limit" );
    if( nErrsToPrint < 0 )
    {
        throw InvalidArgValue( "number of validation errors to print must be non-negative" );
    }

    int nWarmupPasses = opts.getOptionInt( "warmupPasses" );
    if( nWarmupPasses < 0 )
    {
        throw InvalidArgValue( "number of warmup passes must be non-negative" );
    }
}

开发者ID:adityaatluri，项目名称:shoc-1，代码行数:35，代码来源:Stencil2Dmain.cpp

示例2: RunBenchmark

// ****************************************************************************
// Function: RunBenchmark
//
// Purpose:
//   Measures the floating point capability of the device for a variety of 
//   combinations of arithmetic operations.
//
// Arguments:
//   op: the options parser / parameter database
//
// Returns:  nothing
//
// Programmer: Zhi Ying([email protected])
//             Jun Jin([email protected])
//
// Creation: May 23, 2011
//
// Modifications:
// 12/12/12 - Kyle Spafford - Code style and minor integration updates
//
// ****************************************************************************
void RunBenchmark(OptionParser &op, ResultDatabase &resultDB)
{
    const bool verbose = op.getOptionBool("verbose");
    // Quiet == no progress bar.
    const bool quiet   = op.getOptionBool("quiet");
    const unsigned int passes = op.getOptionInt("passes");
    const int micdev = op.getOptionInt("target");

    double repeatF = 3;
    cout << "Adjust repeat factor = " << repeatF << "\n";

    // Initialize progress bar
    int totalRuns = 16*passes*2;
    ProgressBar pb(totalRuns);
    if (!verbose && !quiet) 
    {
        pb.Show(stdout);
    }

    RunTest<float>(resultDB, passes, verbose, quiet,
                   repeatF, pb, "-SP", micdev);
    RunTest<double>(resultDB, passes, verbose, quiet,
                    repeatF, pb, "-DP", micdev);

    if (!verbose) cout << endl;
}

开发者ID:optimus-prime，项目名称:shoc-mic，代码行数:47，代码来源:MaxFlops.cpp

示例3: GPUSetup

// ****************************************************************************
// Function: GPUSetup
//
// Purpose:
//  do the necessary OpenCL setup for GPU part of the test
//
// Arguments:
//   op: the options parser / parameter database
//   mympirank: for printing errors in case of failure
//   mynoderank: this is typically the device ID (the mapping done in main)
//
// Returns: success/failure
//
// Creation: 2009
//
// Modifications:
//
// ****************************************************************************
//
int GPUSetup(OptionParser &op, int mympirank, int mynoderank)
{
    addBenchmarkSpecOptions(op);
    
    if (op.getOptionBool("infoDevices"))
    {
        OpenCLNodePlatformContainer ndc1;
        ndc1.Print (cout);
        return (0);
    }
    
    // The device option supports specifying more than one device
    int platform = op.getOptionInt("platform");
    int deviceIdx = mynoderank;
    if( deviceIdx >= op.getOptionVecInt( "device" ).size() )
    {
        std::ostringstream estr;
        estr << "Warning: not enough devices specified with --device flag for task "
            << mympirank
            << " ( node rank " << mynoderank
            << ") to claim its own device; forcing to use first device ";
        std::cerr << estr.str() << std::endl;
        deviceIdx = 0;
    }
    int device = op.getOptionVecInt("device")[deviceIdx];

    // Initialization
    _mpicontention_ocldev = new cl::Device( ListDevicesAndGetDevice(platform, device) );
    std::vector<cl::Device> ctxDevices;
    ctxDevices.push_back( *_mpicontention_ocldev );
    _mpicontention_ocldriver_ctx   = new cl::Context( ctxDevices );
    _mpicontention_ocldriver_queue = new cl::CommandQueue( *_mpicontention_ocldriver_ctx, *_mpicontention_ocldev, CL_QUEUE_PROFILING_ENABLE );
    _mpicontention_gpuop = op;
    return 0;
}

开发者ID:Poojachitral，项目名称:shoc，代码行数:54，代码来源:OCLDriver.cpp

示例4: cudaGetDevice

void
RunBenchmark(ResultDatabase &resultDB, OptionParser &op)
{
    // Test to see if this device supports double precision
    cudaGetDevice(&fftDevice);
    cudaDeviceProp deviceProp;
    cudaGetDeviceProperties(&deviceProp, fftDevice);
    bool has_dp = (deviceProp.major == 1 && deviceProp.minor >= 3) ||
        (deviceProp.major >= 2);

    cout << "Running single precision test" << endl;
    runTest<float2>("SP-FFT", resultDB, op);
    if (has_dp) {
        cout << "Running double precision test" << endl;
        runTest<double2>("DP-FFT", resultDB, op);
    }
    else
    {
        cout << "Skipping double precision test" << endl;
        char atts[32] = "DP_Not_Supported";
        // resultDB requires neg entry for every possible result
        int passes = op.getOptionInt("passes");
        for (int k=0; k<passes; k++)
        {
            resultDB.AddResult("DP-FFT" , atts, "GB/s", FLT_MAX);
            resultDB.AddResult("DP-FFT_PCIe" , atts, "GB/s", FLT_MAX);
            resultDB.AddResult("DP-FFT_Parity" , atts, "GB/s", FLT_MAX);
            resultDB.AddResult("DP-FFT-INV" , atts, "GB/s", FLT_MAX);
            resultDB.AddResult("DP-FFT-INV_PCIe" , atts, "GB/s", FLT_MAX);
            resultDB.AddResult("DP-FFT-INV_Parity" , atts, "GB/s", FLT_MAX);
        }
    }
}

开发者ID:vetter，项目名称:shoc，代码行数:33，代码来源:FFT.cpp

示例5: dump2D

void dump2D(OptionParser& op)
{
	int i;
	void* work, *temp;
	T2* source, * result;
	unsigned long bytes = 0;

	int probSizes[7] = { 128, 256, 512, 1024, 2048, 4096, 8192};
	int sizeIndex = op.getOptionInt("pts1")-1;
	int sizeIndey = op.getOptionInt("pts2")-1;
	if (sizeIndex < 0 || sizeIndex >= 7) {
		cerr << "Invalid size index specified\n";
		exit(-1);
	}
	if (sizeIndey < 0 || sizeIndey >= 7) {
		cerr << "Invalid size index specified\n";
		exit(-1);
	}

	int FFTN1=probSizes[sizeIndex],FFTN2=probSizes[sizeIndey];
	//int FFTN1=8192,FFTN2=512;
	unsigned long used_bytes = FFTN1*FFTN2*sizeof(T2);

	bool do_dp = dp<T2>();
	init2(op, do_dp, FFTN1, FFTN2);

	int n_ffts = 1;
	double N = FFTN1*FFTN2;


	// allocate host and device memory
	allocHostBuffer((void**)&source, used_bytes);
	allocHostBuffer((void**)&result, used_bytes);

	// init host memory...
	for (i = 0; i < N; i++) {
		source[i].x = (rand()/(float)RAND_MAX)*2-1;
		source[i].y = (rand()/(float)RAND_MAX)*2-1;
	}

	// alloc device memory
	allocDeviceBuffer(&work, used_bytes);
	allocDeviceBuffer(&temp, used_bytes);

	copyToDevice(work, source, used_bytes);

	forward2(work, temp, n_ffts, FFTN1, FFTN2);
	copyFromDevice(result, work, used_bytes);

#ifdef PRINT_RESULT
	for (i = 0; i < N; i++) {
		fprintf(stdout, "data[%d] (%g, %g) \n",i, result[i].x, result[i].y);
	}
#endif
	freeDeviceBuffer(work);
	freeDeviceBuffer(temp);
	freeHostBuffer(source);
	freeHostBuffer(result);
}

开发者ID:Sable，项目名称:Ostrich，代码行数:59，代码来源:fft.cpp

示例6: if

void
RunBenchmark(cl::Device& devcpp,
             cl::Context& ctxcpp,
             cl::CommandQueue& queuecpp,
             ResultDatabase &resultDB,
             OptionParser &op)
{
    // Convert from C++ bindings to C bindings
    // TODO propagate use of C++ bindings
    cl_device_id dev = devcpp();
    cl_context ctx = ctxcpp();
    cl_command_queue queue = queuecpp();

    // Collect basic MPI information
    int size, rank;
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    // Always run single precision test
    // OpenCL doesn't support templated kernels, so we have to use macros
    string spMacros = "-DSINGLE_PRECISION";
    runTest<float>
        ("TPScan-SP", dev, ctx, queue, resultDB, op, spMacros);

    // If double precision is supported, run the DP test
    if (checkExtension(dev, "cl_khr_fp64"))
    {
        cout << "DP Supported\n";
        string dpMacros = "-DK_DOUBLE_PRECISION ";
        runTest<double>
        ("TPScan-DP", dev, ctx, queue, resultDB, op, dpMacros);
    }
    else if (checkExtension(dev, "cl_amd_fp64"))
    {
        cout << "DP Supported\n";
        string dpMacros = "-DAMD_DOUBLE_PRECISION ";
        runTest<double>
        ("TPScan-DP", dev, ctx, queue, resultDB, op, dpMacros);
    }
    else
    {
        char atts[1024] = "DP_Not_Supported";
        cout << "Warning, rank " << rank << "'s device does not support DP\n";
        // ResultDB requires every rank to report something. If this rank
        // doesn't support DP, submit FLT_MAX (this is handled as no result by
        // ResultDB.
        int passes = op.getOptionInt("passes");
        for (int k = 0; k < passes; k++)
        {
            resultDB.AddResult("TPScan-DP-Kernel" , atts, "GB/s", FLT_MAX);
            resultDB.AddResult("TPScan-DP-Kernel+PCIe" , atts, "GB/s",
                FLT_MAX);
            resultDB.AddResult("TPScan-DP-MPI_ExScan" , atts, "GB/s",
                FLT_MAX);
            resultDB.AddResult("TPScan-DP-Overall" , atts, "GB/s", FLT_MAX);
        }
    }
}

开发者ID:dylanzika，项目名称:shoc，代码行数:58，代码来源:tpScan.cpp

示例7: dump1D

void dump1D(OptionParser& op)
{
	int i;
	int fftn;
	void* work, *temp;
	T2* source, * result;
	unsigned long bytes = 0;

	int probSizes[7] = { 128, 256, 512, 1024, 2048, 4096, 8192 };
	int sizeIndex = op.getOptionInt("pts")-1;
	if (sizeIndex < 0 || sizeIndex >= 7) {
		cerr << "Invalid size index specified\n";
		exit(-1);
	}
	fftn = probSizes[sizeIndex];

	// Convert to MB
	unsigned long used_bytes = fftn * sizeof(T2);

	bool do_dp = dp<T2>();
	init(op, do_dp, fftn);

	// now determine how much available memory will be used
	//int half_n_ffts = bytes / (fftn*sizeof(T2)*2);
	int n_ffts = 1;
	double N = fftn;

	fprintf(stdout, "used_bytes=%lu, N=%g\n", used_bytes, N);

	// allocate host and device memory
	allocHostBuffer((void**)&source, used_bytes);
	allocHostBuffer((void**)&result, used_bytes);

	// init host memory...
	for (i = 0; i < N; i++) {
		source[i].x = (rand()/(float)RAND_MAX)*2-1;
		source[i].y = (rand()/(float)RAND_MAX)*2-1;
	}

	// alloc device memory
	allocDeviceBuffer(&work, used_bytes);
	allocDeviceBuffer(&temp, used_bytes);
	copyToDevice(work, source, used_bytes);


	forward(work, temp, n_ffts, fftn);

	copyFromDevice(result, work, used_bytes);
#ifdef PRINT_RESULT
	for (i = 0; i < N; i++) {
		fprintf(stdout, "data[%d] (%g, %g)\n", i, result[i].x, result[i].y);
	}
#endif
	freeDeviceBuffer(work);
	freeDeviceBuffer(temp);
	freeHostBuffer(source);
	freeHostBuffer(result);
}

开发者ID:Sable，项目名称:Ostrich，代码行数:58，代码来源:fft.cpp

示例8: defined

void
RunBenchmark(OptionParser& opts, ResultDatabase& resultDB )
{
    int device;

#if defined(PARALLEL)
    int cwrank;
    MPI_Comm_rank( MPI_COMM_WORLD, &cwrank );
#endif // defined(PARALLEL)
#if defined(PARALLEL)
    if( cwrank == 0 )
    {
#endif // defined(PARALLEL)
        std::cout << "Running single precision test" << std::endl;
#if defined(PARALLEL)
    }
#endif // defined(PARALLEL)
    //omp_set_num_threads(124);
    DoTest<float>( "SP_Sten2D", resultDB, opts );

    // check if we can run double precision tests
    if( //deviceProps.major == 1) && (deviceProps.minor >= 3)) ||
        //eviceProps.major >= 2))
        1)
    {
#if defined(PARALLEL)
        if( cwrank == 0 )
        {
#endif // defined(PARALLEL)
            std::cout << "DP supported\n" << std::endl;
#if defined(PARALLEL)
        }
#endif // defined(PARALLEL)
	//omp_set_num_threads(93);
        DoTest<double>( "DP_Sten2D", resultDB, opts );
    }
    else
    {
#if defined(PARALLEL)
        if( cwrank == 0 )
        {
#endif // defined(PARALLEL)
            std::cout << "Double precision not supported - skipping" << std::endl;
#if defined(PARALLEL)
        }
#endif // defined(PARALLEL)
        // resultDB requires neg entry for every possible result
        int nPasses = (int)opts.getOptionInt( "passes" );
        for( int p = 0; p < nPasses; p++ )
        {
            resultDB.AddResult( (const char*)"DP_Sten2D", "N/A", "s", FLT_MAX);
        }
    }
	
}

开发者ID:optimus-prime，项目名称:shoc-mic，代码行数:55，代码来源:Stencil2Dmain.cpp

示例9: InvalidArgValue

void
MPICUDAStencilFactory<T>::CheckOptions( const OptionParser& opts ) const
{
    // let base class check its options first
    CommonCUDAStencilFactory<T>::CheckOptions( opts );

    // check our options
    std::vector<long long> shDims = opts.getOptionVecInt( "lsize" );
    std::vector<long long> arrayDims = opts.getOptionVecInt( "customSize" );
    if( arrayDims[0] == 0 )
    {
        // custom size was not specified - we are using a standard size
        int sizeClass = opts.getOptionInt("size");
        arrayDims = StencilFactory<T>::GetStandardProblemSize( sizeClass );
    }
    assert( shDims.size() == 2 );
    assert( arrayDims.size() == 2 );

    size_t gRows = (size_t)arrayDims[0];
    size_t gCols = (size_t)arrayDims[1];
    size_t lRows = shDims[0];
    size_t lCols = shDims[1];

    unsigned int haloWidth = (unsigned int)opts.getOptionInt( "iters-per-exchange" );

    // verify that MPI halo width will result in a matrix being passed
    // to the kernel that also has its global size as a multiple of
    // the local work size
    //
    // Because the MPI halo width is arbitrary, and the kernel halo width
    // is always 1, we have to ensure that:
    //   ((size + 2*halo) - 2) % lsize == 0
    if( (((gRows + 2*haloWidth) - 2) % lRows) != 0 )
    {
        throw InvalidArgValue( "rows including halo must be even multiple of lsize (e.g., lsize rows evenly divides ((rows + 2*halo) - 2) )" );
    }
    if( (((gCols + 2*haloWidth) - 2) % lCols) != 0 )
    {
        throw InvalidArgValue( "columns including halo must be even multiple of lsize (e.g., lsize cols evenly divides ((cols + 2*halo) - 2) )" );
    }
}

开发者ID:BenjaminCoquelle，项目名称:shoc，代码行数:41，代码来源:MPICUDAStencilFactory.cpp

示例10: InvalidArgValue

void 
StencilFactory<T>::CheckOptions( const OptionParser& options ) const
{
    // number of iterations must be positive
    unsigned int nIters = (unsigned int)options.getOptionInt( "num-iters" );
    if( nIters == 0 )
    {
        throw InvalidArgValue( "number of iterations must be positive" );
    }

    // no restrictions on weight values, just that we have them
}

开发者ID:ellen-hl，项目名称:shoc-mic，代码行数:12，代码来源:StencilFactory.cpp

示例11:

static void
fillResultDB(const string& name, const string& reason, OptionParser &op, 
             ResultDatabase& resultDB)
{
    // resultDB requires neg entry for every possible result
    int passes = op.getOptionInt("passes");
    for (int k=0; k<passes; k++) {
        resultDB.AddResult(name , reason, "GB/s", FLT_MAX);
        resultDB.AddResult(name+"_PCIe" , reason, "GB/s", FLT_MAX);
        resultDB.AddResult(name+"_Parity" , reason, "GB/s", FLT_MAX);
        resultDB.AddResult(name+"-INV" , reason, "GB/s", FLT_MAX);
        resultDB.AddResult(name+"-INV_PCIe" , reason, "GB/s", FLT_MAX);
        resultDB.AddResult(name+"-INV_Parity" , reason, "GB/s", FLT_MAX);
    }
}

开发者ID:Poojachitral，项目名称:shoc，代码行数:15，代码来源:FFT.cpp

示例12: if

void
RunBenchmark(cl::Device& devcpp,
                  cl::Context& ctxcpp,
                  cl::CommandQueue& queuecpp,
                  ResultDatabase &resultDB,
                  OptionParser &op)
{
    // convert from C++ bindings to C bindings
    // TODO propagate use of C++ bindings
    cl_device_id dev = devcpp();
    cl_context ctx = ctxcpp();
    cl_command_queue queue = queuecpp();

    // Always run single precision test
    // OpenCL doesn't support templated kernels, so we have to use macros
    runTest<float>("SGEMM", dev, ctx, queue, resultDB, op,
            "-DSINGLE_PRECISION");

    // If double precision is supported, run the DP test
    if (checkExtension(dev, "cl_khr_fp64"))
    {
        cout << "DP Supported\n";
        runTest<double>("DGEMM", dev, ctx, queue, resultDB, op,
                "-DK_DOUBLE_PRECISION ");
    }
    else if (checkExtension(dev, "cl_amd_fp64"))
    {
        cout << "DP Supported\n";
        runTest<double>("DGEMM", dev, ctx, queue, resultDB, op,
                "-DAMD_DOUBLE_PRECISION ");
    }
    else
    {
        cout << "DP Not Supported\n";
        char atts[1024] = "DP_Not_Supported";
        // resultDB requires neg entry for every possible result
        int passes = op.getOptionInt("passes");
        for (; passes > 0; --passes) {
            for (int i = 0; i < 2; i++) {
                const char transb = i ? 'T' : 'N';
                string testName="DGEMM";
                resultDB.AddResult(testName+"-"+transb, atts, "GFlops", FLT_MAX);
                resultDB.AddResult(testName+"-"+transb+"_PCIe", atts, "GFlops", FLT_MAX);
                resultDB.AddResult(testName+"-"+transb+"_Parity", atts, "N", FLT_MAX);
            }
        }
    }
}

开发者ID:ashwinma，项目名称:mpiacc-contention-tests，代码行数:48，代码来源:SGEMM.cpp

示例13: if

void
RunBenchmark(cl::Device& devcpp, cl::Context& ctxcpp,
             cl::CommandQueue& queuecpp,
             ResultDatabase &resultDB, OptionParser &op)
{
    // convert from C++ bindings to C bindings
    // TODO propagate use of C++ bindings
    cl_device_id dev = devcpp();
    cl_context ctx = ctxcpp();
    cl_command_queue queue = queuecpp();

    // Always run single precision test
    // OpenCL doesn't support templated kernels, so we have to use macros
    string spMacros = "-DSINGLE_PRECISION ";
    RunTest<float>("S3D-SP", dev, ctx, queue, resultDB, op, spMacros);

    // If double precision is supported, run the DP test
    if (checkExtension(dev, "cl_khr_fp64"))
    {
        cout << "DP Supported\n";
        string dpMacros = "-DK_DOUBLE_PRECISION ";
        RunTest<double>
        ("S3D-DP", dev, ctx, queue, resultDB, op, dpMacros);
    }
    else if (checkExtension(dev, "cl_amd_fp64"))
    {
        cout << "DP Supported\n";
        string dpMacros = "-DAMD_DOUBLE_PRECISION ";
        RunTest<double>
        ("S3D-DP", dev, ctx, queue, resultDB, op, dpMacros);
    }
    else
    {
        cout << "DP Not Supported\n";
        char atts[1024] = "DP_Not_Supported";
        // resultDB requires neg entry for every possible result
        int passes = op.getOptionInt("passes");
        for (int k = 0; k < passes; k++) {
            resultDB.AddResult("S3D-DP" , atts, "GB/s", FLT_MAX);
            resultDB.AddResult("S3D-DP_PCIe" , atts, "GB/s", FLT_MAX);
            resultDB.AddResult("S3D-DP_Parity" , atts, "GB/s", FLT_MAX);
        }
    }
}

开发者ID:dylanzika，项目名称:shoc，代码行数:44，代码来源:S3D.cpp

示例14: if

void
RunBenchmark(cl_device_id dev,
                  cl_context ctx,
                  cl_command_queue queue,
                  ResultDatabase &resultDB,
                  OptionParser &op)
{
    // Always run single precision test
    // OpenCL doesn't support templated kernels, so we have to use macros
    string spMacros = "-DSINGLE_PRECISION";
    runTest<float, float4, float4>
        ("MD-LJ", dev, ctx, queue, resultDB, op, spMacros);

    // If double precision is supported, run the DP test
    if (checkExtension(dev, "cl_khr_fp64"))
    {
        cout << "DP Supported\n";
        string dpMacros = "-DK_DOUBLE_PRECISION ";
        runTest<double, double4, double4>
                ("MD-LJ-DP", dev, ctx, queue, resultDB, op, dpMacros);
    }
    else if (checkExtension(dev, "cl_amd_fp64"))
    {
        cout << "DP Supported\n";
        string dpMacros = "-DAMD_DOUBLE_PRECISION ";
        runTest<double, double4, double4>
        ("MD-LJ-DP", dev, ctx, queue, resultDB, op, dpMacros);
    }
    else
    {
        cout << "DP Not Supported\n";
        char atts[32] = "DP_Not_Supported";
        // resultDB requires neg entry for every possible result
        int passes = op.getOptionInt("passes");
        for (int i = 0; i < passes; i++) {
            resultDB.AddResult("MD-LJ-DP" , atts, "GB/s", FLT_MAX);
            resultDB.AddResult("MD-LJ-DP_PCIe" , atts, "GB/s", FLT_MAX);
            resultDB.AddResult("MD-LJ-DP-Bandwidth", atts, "GB/s", FLT_MAX);
            resultDB.AddResult("MD-LJ-DP-Bandwidth_PCIe", atts, "GB/s", FLT_MAX);
            resultDB.AddResult("MD-LJ-DP_Parity" , atts, "GB/s", FLT_MAX);
        }
    }
}

开发者ID:BenjaminCoquelle，项目名称:shoc，代码行数:43，代码来源:MD.cpp

示例15: RunBenchmark

// ****************************************************************************
// Function: RunBenchmark
//
// Purpose: 
//   Runs the stablity test. The algorithm for the parallel
//   version of the test, which enables testing of an entire GPU
//   cluster at the same time, is as follows. Each participating node
//   first allocates its data, while node zero additionally determines
//   start and finish times based on a user input parameter. All nodes
//   then enter the outermost loop, copying fresh data from the CPU
//   before entering the core of the test. In the core, each node
//   performs a loop consisting of the forward kernel, a potential
//   check, and then the inverse kernel. After performing a configurable
//   number of forward/inverse iterations, along with a configurable
//   number of checks, each node sends the number of failures it
//   encountered to node zero. Node zero collects and reports the error
//   counts, determines whether the test has run its course, and
//   broadcasts the decision. If the decision is to proceed, each node
//   begins the next iteration of the outer loop, copying fresh data and
//   then performing the kernels and checks of the core loop.
//
// Arguments:
//   resultDB: the benchmark stores its results in this ResultDatabase
//   op: the options parser / parameter database
//
// Returns:  nothing
//
// Programmer: Collin McCurdy
// Creation: September 08, 2009
//
// Modifications:
//
// ****************************************************************************
void RunBenchmark(ResultDatabase &resultDB, OptionParser& op)
{	
    int mpi_rank, mpi_size, node_rank;
    int i, j;
    float2* source, * result;
    void* work, * chk;

#ifdef PARALLEL
    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);

    NodeInfo NI;
    node_rank = NI.nodeRank();

    cout << "MPI Task " << mpi_rank << " of " << mpi_size 
         << " (noderank=" << node_rank << ") starting....\n";
#else
    mpi_rank = 0;
    mpi_size = 1;
    node_rank = 0;
#endif
   
    // ensure chk buffer alloc succeeds before grabbing the
    // rest of available memory.
    allocDeviceBuffer(&chk, 1);
    unsigned long avail_bytes = findAvailBytes();
    // unsigned long avail_bytes = 1024*1024*1024-1;
    
    // now determine how much available memory will be used (subject
    // to CUDA's constraint on the maximum block dimension size)
    int blocks = avail_bytes / (512*sizeof(float2));
    int slices = 1;
    while (blocks/slices > 65535) {
        slices *= 2;
    }
    int half_n_ffts = ((blocks/slices)*slices)/2;
    int n_ffts = half_n_ffts * 2;
    fprintf(stderr, "avail_bytes=%ld, blocks=%d, n_ffts=%d\n", 
            avail_bytes, blocks, n_ffts);

    int half_n_cmplx = half_n_ffts * 512;
    unsigned long used_bytes = half_n_cmplx * 2 * sizeof(float2);

    cout << mpi_rank << ": testing " 
         << used_bytes/((double)1024*1024) << " MBs\n";

    // allocate host memory
    source = (float2*)malloc(used_bytes);
    result = (float2*)malloc(used_bytes);

    // alloc device memory
    allocDeviceBuffer(&work, used_bytes);

    // alloc gather buffer
    int* recvbuf = (int*)malloc(mpi_size*sizeof(int));
    
    // compute start and finish times
    time_t start = time(NULL);
    time_t finish = start + (time_t)(op.getOptionInt("time")*60);
    struct tm start_tm, finish_tm;
    localtime_r(&start, &start_tm);
    localtime_r(&finish, &finish_tm);
    if (mpi_rank == 0) {
        printf("start = %s", asctime(&start_tm));
        printf("finish = %s", asctime(&finish_tm));
    }
    
//.........这里部分代码省略.........

开发者ID:Poojachitral，项目名称:shoc，代码行数:101，代码来源:Stability.cpp

注：本文中的OptionParser::getOptionInt方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。