当前位置: 首页>>代码示例>>C++>>正文


C++ OptionParser::getOptionBool方法代码示例

本文整理汇总了C++中OptionParser::getOptionBool方法的典型用法代码示例。如果您正苦于以下问题:C++ OptionParser::getOptionBool方法的具体用法?C++ OptionParser::getOptionBool怎么用?C++ OptionParser::getOptionBool使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在OptionParser的用法示例。


在下文中一共展示了OptionParser::getOptionBool方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: RunBenchmark

// ****************************************************************************
// Function: RunBenchmark
//
// Purpose:
//   Measures the floating point capability of the device for a variety of 
//   combinations of arithmetic operations.
//
// Arguments:
//   op: the options parser / parameter database
//
// Returns:  nothing
//
// Programmer: Zhi Ying([email protected])
//             Jun Jin([email protected])
//
// Creation: May 23, 2011
//
// Modifications:
// 12/12/12 - Kyle Spafford - Code style and minor integration updates
//
// ****************************************************************************
void RunBenchmark(OptionParser &op, ResultDatabase &resultDB)
{
    const bool verbose = op.getOptionBool("verbose");
    // Quiet == no progress bar.
    const bool quiet   = op.getOptionBool("quiet");
    const unsigned int passes = op.getOptionInt("passes");
    const int micdev = op.getOptionInt("target");

    double repeatF = 3;
    cout << "Adjust repeat factor = " << repeatF << "\n";

    // Initialize progress bar
    int totalRuns = 16*passes*2;
    ProgressBar pb(totalRuns);
    if (!verbose && !quiet) 
    {
        pb.Show(stdout);
    }

    RunTest<float>(resultDB, passes, verbose, quiet,
                   repeatF, pb, "-SP", micdev);
    RunTest<double>(resultDB, passes, verbose, quiet,
                    repeatF, pb, "-DP", micdev);

    if (!verbose) cout << endl;
}
开发者ID:optimus-prime,项目名称:shoc-mic,代码行数:47,代码来源:MaxFlops.cpp

示例2: if

void
RunBenchmark(cl::Device& devcpp,
                  cl::Context& ctxcpp,
                  cl::CommandQueue& queuecpp,
                  ResultDatabase &resultDB,
                  OptionParser &op)
{
    // convert from C++ bindings to C bindings
    // TODO propagate use of C++ bindings
    cl_device_id dev = devcpp();
    cl_context ctx = ctxcpp();
    cl_command_queue queue = queuecpp();

    if (getMaxWorkGroupSize(dev) < 64) {
        cout << "FFT requires MaxWorkGroupSize of at least 64" << endl;
        fillResultDB("SP-FFT", "MaxWorkGroupSize<64", op, resultDB);
        fillResultDB("DP-FFT", "MaxWorkGroupSize<64", op, resultDB);
        return;
    }
    
    bool has_dp = checkExtension(dev, "cl_khr_fp64") ||
        checkExtension(dev, "cl_amd_fp64");

    if (op.getOptionBool("dump-sp")) {
        dump<cplxflt>(op);
    }
    else if (op.getOptionBool("dump-dp")) {
        if (!has_dp) {
            cout << "dump-dp: no double precision support!\n";
            return;
        }
        dump<cplxdbl>(op);
    }
    else {
        // Always run single precision test
        runTest<cplxflt>("SP-FFT", dev, ctx, queue, resultDB, op);

        // If double precision is supported, run the DP test
        if (has_dp) {
            cout << "DP Supported\n";
            runTest<cplxdbl>("DP-FFT", dev, ctx, queue, resultDB, op);
        }
        else {
            cout << "DP Not Supported\n";
            fillResultDB("DP-FFT", "DP_Not_Supported", op, resultDB);
        }
    }
}
开发者ID:Poojachitral,项目名称:shoc,代码行数:48,代码来源:FFT.cpp

示例3: GPUSetup

// ****************************************************************************
// Function: GPUSetup
//
// Purpose:
//  do the necessary OpenCL setup for GPU part of the test
//
// Arguments:
//   op: the options parser / parameter database
//   mympirank: for printing errors in case of failure
//   mynoderank: this is typically the device ID (the mapping done in main)
//
// Returns: success/failure
//
// Creation: 2009
//
// Modifications:
//
// ****************************************************************************
//
int GPUSetup(OptionParser &op, int mympirank, int mynoderank)
{
    addBenchmarkSpecOptions(op);
    
    if (op.getOptionBool("infoDevices"))
    {
        OpenCLNodePlatformContainer ndc1;
        ndc1.Print (cout);
        return (0);
    }
    
    // The device option supports specifying more than one device
    int platform = op.getOptionInt("platform");
    int deviceIdx = mynoderank;
    if( deviceIdx >= op.getOptionVecInt( "device" ).size() )
    {
        std::ostringstream estr;
        estr << "Warning: not enough devices specified with --device flag for task "
            << mympirank
            << " ( node rank " << mynoderank
            << ") to claim its own device; forcing to use first device ";
        std::cerr << estr.str() << std::endl;
        deviceIdx = 0;
    }
    int device = op.getOptionVecInt("device")[deviceIdx];

    // Initialization
    _mpicontention_ocldev = new cl::Device( ListDevicesAndGetDevice(platform, device) );
    std::vector<cl::Device> ctxDevices;
    ctxDevices.push_back( *_mpicontention_ocldev );
    _mpicontention_ocldriver_ctx   = new cl::Context( ctxDevices );
    _mpicontention_ocldriver_queue = new cl::CommandQueue( *_mpicontention_ocldriver_ctx, *_mpicontention_ocldev, CL_QUEUE_PROFILING_ENABLE );
    _mpicontention_gpuop = op;
    return 0;
}
开发者ID:Poojachitral,项目名称:shoc,代码行数:54,代码来源:OCLDriver.cpp

示例4: if

void
RunBenchmark(ResultDatabase &resultDB, OptionParser &op)
{
    // Test to see if this device supports double precision
    cudaGetDevice(&fftDevice);
    cudaDeviceProp deviceProp;
    cudaGetDeviceProperties(&deviceProp, fftDevice);
    bool has_dp = (deviceProp.major == 1 && deviceProp.minor >= 3) ||
        (deviceProp.major >= 2);

    if (op.getOptionBool("dump-sp")) {
        dump<float2>(op);
    }
    else if (op.getOptionBool("dump-dp")) {
        if (!has_dp) {
            cout << "dump-dp: no double precision support!\n";
            return;
        }
        dump<double2>(op);
    }
    else {
        cout << "Running single precision test" << endl;
        runTest<float2>("SP-FFT", resultDB, op);
        if (has_dp) {
            cout << "Running double precision test" << endl;
            runTest<double2>("DP-FFT", resultDB, op);
        } 
        else {
            cout << "Skipping double precision test" << endl;
            char atts[32] = "DP_Not_Supported";
            // resultDB requires neg entry for every possible result
            int passes = op.getOptionInt("passes");
            for (int k=0; k<passes; k++) {
                resultDB.AddResult("DP-FFT" , atts, "GB/s", FLT_MAX);
                resultDB.AddResult("DP-FFT_PCIe" , atts, "GB/s", FLT_MAX);
                resultDB.AddResult("DP-FFT_Parity" , atts, "GB/s", FLT_MAX);
                resultDB.AddResult("DP-FFT-INV" , atts, "GB/s", FLT_MAX);
                resultDB.AddResult("DP-FFT-INV_PCIe" , atts, "GB/s", FLT_MAX);
                resultDB.AddResult("DP-FFT-INV_Parity" , atts, "GB/s", FLT_MAX);
            }
        }
    }
}
开发者ID:ashwinma,项目名称:mpiacc-contention-tests,代码行数:43,代码来源:FFT.cpp

示例5:

	void
RunBenchmark(OptionParser &op)
{
	// convert from C++ bindings to C bindings
	// TODO propagate use of C++ bindings



	if(op.getOptionBool("2D"))
		dump2D<cplxflt>(op);
	else
		dump1D<cplxflt>(op);

}
开发者ID:Sable,项目名称:Ostrich,代码行数:14,代码来源:fft.cpp

示例6: strrchr

void
RunBenchmark(OptionParser &op, ResultDatabase &resultDB)
{
    const bool verbose = op.getOptionBool("verbose");

    if (verbose) // print MKL version info
    {
        static char mklver[200];
        char *p;
        MKL_Get_Version_String(mklver,sizeof(mklver));
        mklver[sizeof(mklver)-1] = 0;
        p = strrchr(mklver,' ');
        if (p) while (p[0]==' ' && p[1]==0) *p-- = 0;
        printf("SHOC FFT benchmark using MKL verison %s\n",mklver);
    }
    RunTest<cplxflt>("SP-FFT", resultDB, op);
    RunTest<cplxdbl>("DP-FFT", resultDB, op);
}
开发者ID:ellen-hl,项目名称:shoc-mic,代码行数:18,代码来源:FFT.cpp

示例7: InvalidArgValue

void
DoTest( const char* timerDesc, ResultDatabase& resultDB, OptionParser& opts )
{
    StencilFactory<T>* stdStencilFactory = NULL;
    Stencil<T>* stdStencil = NULL;
    StencilFactory<T>* testStencilFactory = NULL;
    Stencil<T>* testStencil = NULL;

    try
    {
#if defined(PARALLEL)
        stdStencilFactory = new MPIHostStencilFactory<T>;
        testStencilFactory = new MPICUDAStencilFactory<T>;
#else
        stdStencilFactory = new HostStencilFactory<T>;
        testStencilFactory = new CUDAStencilFactory<T>;
#endif // defined(PARALLEL)
        assert( (stdStencilFactory != NULL) && (testStencilFactory != NULL) );

        // do a sanity check on option values
        CheckOptions( opts );
        stdStencilFactory->CheckOptions( opts );
        testStencilFactory->CheckOptions( opts );

        // extract and validate options
        std::vector<long long> arrayDims = opts.getOptionVecInt( "customSize" );
        if( arrayDims.size() != 2 )
        {
            cerr << "Dim size: " << arrayDims.size() << "\n";
            throw InvalidArgValue( "all overall dimensions must be positive" );
        }
        if (arrayDims[0] == 0) // User has not specified a custom size
        {
            int sizeClass = opts.getOptionInt("size");
            arrayDims = StencilFactory<T>::GetStandardProblemSize( sizeClass );
        }
        long int seed = (long)opts.getOptionInt( "seed" );
        bool beVerbose = opts.getOptionBool( "verbose" );
        unsigned int nIters = (unsigned int)opts.getOptionInt( "num-iters" );
        double valErrThreshold = (double)opts.getOptionFloat( "val-threshold" );
        unsigned int nValErrsToPrint = (unsigned int)opts.getOptionInt( "val-print-limit" );

#if defined(PARALLEL)
        unsigned int haloWidth = (unsigned int)opts.getOptionInt( "iters-per-exchange" );
#else
        unsigned int haloWidth = 1;
#endif // defined(PARALLEL)

        float haloVal = (float)opts.getOptionFloat( "haloVal" );

        // build a description of this experiment
        std::vector<long long> lDims = opts.getOptionVecInt( "lsize" );
        assert( lDims.size() == 2 );
        std::ostringstream experimentDescriptionStr;
        experimentDescriptionStr
            << nIters << ':'
            << arrayDims[0] << 'x' << arrayDims[1] << ':'
            << lDims[0] << 'x' << lDims[1];

        unsigned int nPasses = (unsigned int)opts.getOptionInt( "passes" );
        unsigned int nWarmupPasses = (unsigned int)opts.getOptionInt( "warmupPasses" );


        // compute the expected result on the host
        // or read it from a pre-existing file
        std::string matrixFilenameBase = (std::string)opts.getOptionString( "expMatrixFile" );
#if defined(PARALLEL)
        int cwrank;
        MPI_Comm_rank( MPI_COMM_WORLD, &cwrank );
        if( cwrank == 0 )
        {
#endif // defined(PARALLEL)
        if( !matrixFilenameBase.empty() )
        {
            std::cout << "\nReading expected stencil operation result from file for later comparison with CUDA output\n"
                << std::endl;
        }
        else
        {
            std::cout << "\nPerforming stencil operation on host for later comparison with CUDA output\n"
                << "Depending on host capabilities, this may take a while."
                << std::endl;
        }
#if defined(PARALLEL)
        }
#endif // defined(PARALLEL)
        Matrix2D<T> expected( arrayDims[0] + 2*haloWidth,
                            arrayDims[1] + 2*haloWidth );
        Initialize<T> init( seed, haloWidth, haloVal );

        bool haveExpectedData = false;
        if( ! matrixFilenameBase.empty() )
        {
            bool readOK = ReadMatrixFromFile( expected, GetMatrixFileName<T>( matrixFilenameBase ) );
            if( readOK )
            {

                if( (expected.GetNumRows() != arrayDims[0] + 2*haloWidth) ||
                    (expected.GetNumColumns() != arrayDims[1] + 2*haloWidth) )
                {
//.........这里部分代码省略.........
开发者ID:adityaatluri,项目名称:shoc-1,代码行数:101,代码来源:Stencil2Dmain.cpp

示例8: main

// ****************************************************************************
// Method:  main()
//
// Purpose:
//   serial and parallel main for OpenCL level0 benchmarks
//
// Arguments:
//   argc, argv
//
// Programmer:  SHOC Team
// Creation:    The Epoch
//
// Modifications:
//   Jeremy Meredith, Tue Jan 12 15:09:33 EST 2010
//   Changed the way device selection works.  It now defaults to the device
//   index corresponding to the process's rank within a node if no devices
//   are specified on the command command line, and otherwise, round-robins
//   the list of devices among the tasks.
//
//   Gabriel Marin, Tue Jun 01 15:38 EST 2010
//   Check that we have valid (not NULL) context and queue objects before
//   running the benchmarks. Errors inside CreateContextFromSingleDevice or
//   CreateCommandQueueForContextAndDevice were not propagated out to the main
//   program.
//
//   Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010
//   Split timing reports into detailed and summary.  For serial code, we
//   report all trial values, and for parallel, skip the per-process vals.
//   Also detect and print outliers from parallel runs.
//
// ****************************************************************************
int main(int argc, char *argv[])
{
    int ret = 0;

    try
    {
#ifdef PARALLEL
        int rank, size;
        MPI_Init(&argc,&argv);
        MPI_Comm_size(MPI_COMM_WORLD, &size);
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
        cout << "MPI Task "<< rank << "/" << size - 1 << " starting....\n";
#endif

        OptionParser op;

        //Add shared options to the parser
        op.addOption("platform", OPT_INT, "0", "specify OpenCL platform to use",
                'p');
        op.addOption("device", OPT_VECINT, "", "specify device(s) to run on", 'd');
        op.addOption("passes", OPT_INT, "10", "specify number of passes", 'n');
        op.addOption("size", OPT_VECINT, "1", "specify problem size", 's');
        op.addOption("infoDevices", OPT_BOOL, "",
                "show info for available platforms and devices", 'i');
        op.addOption("verbose", OPT_BOOL, "", "enable verbose output", 'v');
        op.addOption("quiet", OPT_BOOL, "", "write minimum necessary to standard output", 'q');

        addBenchmarkSpecOptions(op);

        if (!op.parse(argc, argv))
        {
#ifdef PARALLEL
            if (rank == 0)
                op.usage();
            MPI_Finalize();
#else
            op.usage();
#endif
            return (op.HelpRequested() ? 0 : 1 );
        }

        if (op.getOptionBool("infoDevices"))
        {
#define DEBUG_DEVICE_CONTAINER 0
#ifdef PARALLEL
            // execute following code only if I am the process of lowest
            // rank on this node
            NodeInfo NI;
            int mynoderank = NI.nodeRank();
            if (mynoderank==0)
            {
                int nlrrank, nlrsize;
                MPI_Comm nlrcomm = NI.getNLRComm();
                MPI_Comm_size(nlrcomm, &nlrsize);
                MPI_Comm_rank(nlrcomm, &nlrrank);

                OpenCLNodePlatformContainer ndc1;
                OpenCLMultiNodeContainer localMnc(ndc1);
                localMnc.doMerge (nlrrank, nlrsize, nlrcomm);
                if (rank==0)  // I am the global rank 0, print all configurations
                    localMnc.Print (cout);
            }
#else
            OpenCLNodePlatformContainer ndc1;
            ndc1.Print (cout);
#if DEBUG_DEVICE_CONTAINER
            OpenCLMultiNodeContainer mnc1(ndc1), mnc2;
            mnc1.Print (cout);
            ostringstream oss;
//.........这里部分代码省略.........
开发者ID:ManavA,项目名称:shoc,代码行数:101,代码来源:main.cpp

示例9: RunBenchmark

void RunBenchmark(OptionParser &op, ResultDatabase &resultDB)
{
    const bool verbose = op.getOptionBool("verbose");

    // Sizes are in kb
    const int nSizes  = 17;
    int sizes[nSizes] = {1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,
        32768, 65536};
    
    long long numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;

    // Create host memory
    hostMem = (float*)_mm_malloc(numMaxFloats*sizeof(float),ALIGN);

    if(hostMem==NULL)
    {
        cerr << "Couldn't allocate CPU memory! \n";
        cerr << "Test failed." << endl;
        return;
    }
    // Initialize memory with some pattern.
    for (int i = 0; i < numMaxFloats; i++)
    {
        hostMem[i] = i % 77;
    }

    const unsigned int passes = op.getOptionInt("passes");
    int micdev = op.getOptionInt("target");

    // Allocate memory on the card
    #pragma offload target(mic:micdev) \
    nocopy(hostMem:length(numMaxFloats) alloc_if(1) free_if(0) align(ALIGN) ) 
    {
    }

    // Three passes, forward and backward both
    for (int pass = 0; pass < passes; pass++)
    {
        // Step through sizes forward on even passes and backward on odd
        for (int i = 0; i < nSizes; i++)
        {
            int sizeIndex;
            if ((pass % 2) == 0)
            {
                sizeIndex = i;
            }
            else
            {
                sizeIndex = (nSizes - 1) - i;
            }

            int nbytes = sizes[sizeIndex] * 1024;

            //  D->H test
            double start = curr_second();

            #pragma offload target(mic:micdev) \
            out(hostMem:length((1024*sizes[sizeIndex]/4)) \
            free_if(0) alloc_if(0)  )
            {
            }
            double t = curr_second()-start;

            if (verbose)
            {
                cerr << "Size " << sizes[sizeIndex] << "k took " << t <<
                    " sec\n";
            }

            double speed = (double(sizes[sizeIndex]) * 1024 / 
                    (1000. * 1000. * 1000.)) / t;
            char sizeStr[256];
            sprintf(sizeStr, "% 6dkB", sizes[sizeIndex]);
            resultDB.AddResult("ReadbackSpeed", sizeStr, "GB/sec", speed);
            resultDB.AddResult("ReadbackTime", sizeStr, "ms", t*1000);
        }
    }
    // Free memory allocated on the mic
    #pragma offload target(mic:micdev) \
    in(hostMem:length(numMaxFloats) alloc_if(0)  )
    {
    }

    // Cleanup
    _mm_free(hostMem);
}
开发者ID:ellen-hl,项目名称:shoc-mic,代码行数:86,代码来源:BusSpeedReadback.cpp

示例10: assert

void
DoTest( const char* timerDesc, ResultDatabase& resultDB, OptionParser& opts )
{
    StencilFactory<T>* stdStencilFactory = NULL;
    Stencil<T>* stdStencil = NULL;
    StencilFactory<T>* testStencilFactory = NULL;
    Stencil<T>* testStencil = NULL;

    //try
    {
        stdStencilFactory = new HostStencilFactory<T>;
        testStencilFactory = new MICStencilFactory<T>;
        assert( (stdStencilFactory != NULL) && (testStencilFactory != NULL) );

        // do a sanity check on option values
        CheckOptions( opts );
        stdStencilFactory->CheckOptions( opts );
        testStencilFactory->CheckOptions( opts );

        // extract and validate options
        std::vector<long long> arrayDims = opts.getOptionVecInt( "customSize" );
        if( arrayDims.size() != 2 )
        {
            cerr << "Dim size: " << arrayDims.size() << "\n";
            //throw InvalidArgValue( "all overall dimensions must be positive" );
        }
        if (arrayDims[0] == 0) // User has not specified a custom size
        {
            const int probSizes[4] = { 768, 1408, 2048, 4096 };
            int sizeClass = opts.getOptionInt("size");
            if (!(sizeClass >= 0 && sizeClass < 5))
            {
                //throw InvalidArgValue( "Size class must be between 1-4" );
            }
            arrayDims[0] = arrayDims[1] =probSizes[sizeClass - 1];
        }

        long int seed = (long)opts.getOptionInt( "seed" );
        bool beVerbose = opts.getOptionBool( "verbose" );
        unsigned int nIters = (unsigned int)opts.getOptionInt( "num-iters" );
        double valErrThreshold = (double)opts.getOptionFloat( "val-threshold" );
        unsigned int nValErrsToPrint = (unsigned int)opts.getOptionInt( "val-print-limit" );

#if defined(PARALLEL)
        unsigned int haloWidth = (unsigned int)opts.getOptionInt( "iters-per-exchange" );
#else
        unsigned int haloWidth = 1;
#endif // defined(PARALLEL)

        float haloVal = (float)opts.getOptionFloat( "haloVal" );

        // build a description of this experiment
        std::ostringstream experimentDescriptionStr;
        experimentDescriptionStr 
            << nIters << ':'
            << arrayDims[0] << 'x' << arrayDims[1] << ':'
            << LROWS << 'x' << LCOLS;

        unsigned int nPasses =(unsigned int)opts.getOptionInt( "passes" );
         unsigned long npts = (arrayDims[0] + 2*haloWidth - 2) * 
                                     (arrayDims[1] + 2*haloWidth - 2); 

unsigned long nflops = npts * 11 * nIters;
cout<<"flops are = "<<nflops<<endl;

        // compute the expected result on the host
#if defined(PARALLEL)
        int cwrank;
        MPI_Comm_rank( MPI_COMM_WORLD, &cwrank );
        if( cwrank == 0 )
        {
#endif // defined(PARALLEL)
        std::cout << "\nPerforming stencil operation on host for later comparison with MIC output\n"
            << "Depending on host capabilities, this may take a while."
            << std::endl;
#if defined(PARALLEL)
        }
#endif // defined(PARALLEL)
        Matrix2D<T> exp( arrayDims[0] + 2*haloWidth, 
                            arrayDims[1] + 2*haloWidth );
        Initialize<T> init( seed,
                        haloWidth,
                        haloVal );	

        init( exp );
        if( beVerbose )
        {
            std::cout << "initial state:\n" << exp << std::endl;
        }
        Stencil<T>* stdStencil = stdStencilFactory->BuildStencil( opts );
        (*stdStencil)( exp, nIters );
        if( beVerbose )
        {
            std::cout << "expected result:\n" << exp << std::endl;
        }
	

        // compute the result on the MIC device
        Matrix2D<T> data( arrayDims[0] + 2*haloWidth, 
                            arrayDims[1] + 2*haloWidth );
//.........这里部分代码省略.........
开发者ID:optimus-prime,项目名称:shoc-mic,代码行数:101,代码来源:Stencil2Dmain.cpp

示例11: RunBenchmark

// ****************************************************************************
// Function: RunBenchmark
//
// Purpose:
//   Executes a series of arithmetic benchmarks for OpenCL devices. 
//   OpenCL kernels are auto-generated based on the values in the
//   _benchmark_type structures.
//   The benchmark tests throughput for add, multiply, multiply-add and 
//   multiply+multiply-add series of operations, for 1, 2, 4 and 8
//   independent streams..
//
// Arguments:
//   ctx: the opencl context to use for the benchmark
//   queue: the opencl command queue to issue commands to
//   resultDB: results from the benchmark are stored in this db
//   op: the options parser (contains input parameters)
//
// Returns:  nothing
//
// Programmer: Gabriel Marin
// Creation: June 26, 2009
//
// Modifications:
//
// ****************************************************************************
void RunBenchmark(cl::Device& devcpp,
                  cl::Context& ctxcpp,
                  cl::CommandQueue& queuecpp,
                  ResultDatabase &resultDB,
                  OptionParser &op)
{
    // convert from C++ bindings to C bindings
    // TODO propagate use of C++ bindings
    cl_device_id id = devcpp();
    cl_context ctx = ctxcpp();
    cl_command_queue queue = queuecpp();

    int npasses  = op.getOptionInt("passes");
    bool verbose = op.getOptionBool("verbose");
    bool quiet = op.getOptionBool("quiet");


    int err;
    cl_mem mem1;
    float *hostMem, *hostMem2;
    size_t maxGroupSize = 1;
    size_t localWorkSize = 1;

    // Seed the random number generator
    srand48(8650341L);
    
    // To prevent this benchmark from taking too long to run, we 
    // calibrate how many repetitions of each test to execute. To do this we
    // run one pass through a multiply-add benchmark and then adjust
    // the repeat factor based on runtime. Use MulMAdd4 for this.
    int aIdx = 0;
    float repeatF = 1.0f;
    // Find the index of the MAdd4 benchmark
    while ((aTests!=0) && (aTests[aIdx].name!=0) && 
        strcmp(aTests[aIdx].name,"MAdd4"))
    {   
       aIdx += 1;
    }
    if (aTests && aTests[aIdx].name)  // we found a benchmark with that name
    {
       struct _benchmark_type temp = aTests[aIdx];
       // Limit to one repetition
       temp.numRepeats = 10;

       // Kernel will be generated into this stream
       ostringstream oss;
       generateKernel (oss, temp, "float", "");
       std::string kernelCode(oss.str());
       
       // Allocate host memory
       int halfNumFloatsMax = temp.halfBufSizeMax*1024/4;
       int numFloatsMax = 2*halfNumFloatsMax;
       hostMem = new float[numFloatsMax];
       hostMem2 = new float[numFloatsMax];
       
       // Allocate device memory
       mem1 = clCreateBuffer(ctx, CL_MEM_READ_WRITE, 
                                 sizeof(float)*numFloatsMax, NULL, &err);
       CL_CHECK_ERROR(err);

       err = clEnqueueWriteBuffer(queue, mem1, true, 0,
                               numFloatsMax*sizeof(float), hostMem,
                               0, NULL, NULL);
       CL_CHECK_ERROR(err);
       
       // Create the kernel program
       const char* progSource[] = {kernelCode.c_str()};
       cl_program prog = clCreateProgramWithSource(ctx, 1, progSource, 
                                                   NULL, &err);
       CL_CHECK_ERROR(err);
       
       // Compile the kernel
       err = clBuildProgram(prog, 0, NULL, opts, NULL, NULL);
       // Compile the kernel
       CL_CHECK_ERROR(err);
//.........这里部分代码省略.........
开发者ID:ashwinma,项目名称:mpiacc-contention-tests,代码行数:101,代码来源:MaxFlops.cpp

示例12: RunTest

void RunTest(const string& name, ResultDatabase &resultDB, OptionParser &op)
{
    static __declspec(target(mic)) T2 *source;
    int chk;
    unsigned long bytes = 0;
    const int micdev = op.getOptionInt("target");
    const bool verbose = op.getOptionBool("verbose");
    
    // Get problem size
    if (op.getOptionInt("MB") == 0) {
        int probSizes[4] = { 1, 8, 96, 256 };
        int sizeIndex = op.getOptionInt("size")-1;
        if (sizeIndex < 0 || sizeIndex >= 4) {
            cerr << "Invalid size index specified\n";
            exit(-1);
        }
        bytes = probSizes[sizeIndex];
    } else {
        bytes = op.getOptionInt("MB");
    }
    // Convert to MiB
    bytes *= 1024 * 1024;

    int passes = op.getOptionInt("passes");

    // The size of the transform computed is fixed at 512 complex elements
    int fftsz = 512;        
    int N = (bytes)/sizeof(T2);
    int n_ffts = N/fftsz;

    // Allocate space (aligned)
    source = (T2*) MKL_malloc(bytes,  4096);

    //allocate buffers and create FFT plans
    #pragma offload target(mic:micdev) in(fftsz, n_ffts)       \
                                       nocopy(source:length(N) \
                                       align(4096) alloc_if(1) free_if(0))
    {
        forward((T2*)NULL, fftsz, n_ffts);
        inverse((T2*)NULL, fftsz, n_ffts);
    }

    const char *sizeStr;
    stringstream ss;
    ss << "N=" << (long)N;
    sizeStr = strdup(ss.str().c_str());

    for(int k = 0; k < passes; k++)
    {
        init<T2>( source, fftsz, n_ffts );
        // Warmup
        if (k==0)
        {
            #pragma offload target(mic:micdev) in(fftsz, n_ffts)   \
                                               in(source:length(N) \
                                               alloc_if(0)  free_if(0))
            {
                forward(source, fftsz, n_ffts);
            }
        }

        // Time forward fft with data transfer over PCIe
        double time_fwd_pcie = -curr_second();
        
        // Using in rather than inout to be consistent with CUDA version.
        #pragma offload target(mic:micdev) in(fftsz, n_ffts)               \
                                           in(source:length(N) alloc_if(0) \
                                           free_if(0))
        {
            forward(source, fftsz, n_ffts);
        }
        time_fwd_pcie += curr_second();
        #pragma offload target(mic:micdev) out(source:length(N) alloc_if(0) \
                free_if(0))
        {
        }

        // Time inverse fft with data transfer over PCIe
        double time_inv_pcie = -curr_second();
        #pragma offload target(mic:micdev) in(fftsz, n_ffts)   \
                                           in(source:length(N) \
                                           alloc_if(0)  free_if(0))
        {
            inverse(source, fftsz, n_ffts);
        }
        time_inv_pcie += curr_second();

        #pragma offload target(mic:micdev) out(source:length(N) alloc_if(0) \
            free_if(0))
        {}

        // Check result
        #pragma offload target(mic:micdev) in(fftsz,n_ffts) nocopy(source) \
            out(chk)
        {
            chk = checkDiff(source, fftsz, n_ffts);
        }
        if (verbose || chk)
        {
            cout << "Test " << k << ((chk) ? ": Failed\n" : ": Passed\n");
//.........这里部分代码省略.........
开发者ID:ellen-hl,项目名称:shoc-mic,代码行数:101,代码来源:FFT.cpp

示例13: main

// ****************************************************************************
// Function: main
//
// Purpose:
//   The main function takes care of initialization (device and MPI),  then
//   performs the benchmark and prints results.
//
// Arguments:
//
//
// Programmer: Jeremy Meredith
// Creation:
//
// Modifications:
//   Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010
//   Split timing reports into detailed and summary.  For serial code, we
//   report all trial values, and for parallel, skip the per-process vals.
//   Also detect and print outliers from parallel runs.
//
// ****************************************************************************
int main(int argc, char *argv[])
{
    int ret = 0;
    bool noprompt = false;

    try
    {
#ifdef PARALLEL
        int rank, size;
        MPI_Init(&argc,&argv);
        MPI_Comm_size(MPI_COMM_WORLD, &size);
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
        cerr << "MPI Task " << rank << "/" << size - 1 << " starting....\n";
#endif

        // Get args
        OptionParser op;
       
        //Add shared options to the parser
        op.addOption("device", OPT_VECINT, "0",
                "specify device(s) to run on", 'd');
        op.addOption("verbose", OPT_BOOL, "", "enable verbose output", 'v');
        op.addOption("passes", OPT_INT, "10", "specify number of passes", 'n');
        op.addOption("size", OPT_INT, "1", "specify problem size", 's');
        op.addOption("infoDevices", OPT_BOOL, "",
                "show info for available platforms and devices", 'i');
        op.addOption("quiet", OPT_BOOL, "", "write minimum necessary to standard output", 'q');
#ifdef _WIN32
        op.addOption("noprompt", OPT_BOOL, "", "don't wait for prompt at program exit");
#endif

        addBenchmarkSpecOptions(op);

        if (!op.parse(argc, argv))
        {
#ifdef PARALLEL
            if (rank == 0)
                op.usage();
            MPI_Finalize();
#else
            op.usage();
#endif
            return (op.HelpRequested() ? 0 : 1);
        }
        
        bool verbose = op.getOptionBool("verbose");
        bool infoDev = op.getOptionBool("infoDevices");
#ifdef _WIN32
        noprompt = op.getOptionBool("noprompt");
#endif

        int device;
#ifdef PARALLEL
        NodeInfo ni;
        int myNodeRank = ni.nodeRank();
        vector<long long> deviceVec = op.getOptionVecInt("device");
        if (myNodeRank >= deviceVec.size()) {
            // Default is for task i to test device i
            device = myNodeRank;
        } else {
            device = deviceVec[myNodeRank];
        }
#else
        device = op.getOptionVecInt("device")[0];
#endif
        int deviceCount;
        cudaGetDeviceCount(&deviceCount);
        if (device >= deviceCount) {
            cerr << "Warning: device index: " << device <<
            " out of range, defaulting to device 0.\n";
            device = 0;
        }

        // Initialization
        EnumerateDevicesAndChoose(device, infoDev);
        if( infoDev )
        {
            return 0;
        }
        ResultDatabase resultDB;
//.........这里部分代码省略.........
开发者ID:Poojachitral,项目名称:shoc,代码行数:101,代码来源:main.cpp

示例14: if

void 
init(OptionParser& op, bool _do_dp)
{
    cl_int err;

    do_dp = _do_dp;
    
    if (!fftCtx) {
        // first get the device
        int device, platform = op.getOptionInt("platform");
        if (op.getOptionVecInt("device").size() > 0) {
            device = op.getOptionVecInt("device")[0];
        }
        else {
            device = 0;
        }
        fftDev = ListDevicesAndGetDevice(platform, device);

        // now get the context
        fftCtx = clCreateContext(NULL, 1, &fftDev, NULL, NULL, &err);
        CL_CHECK_ERROR(err);
    }

    if (!fftQueue) {
        // get a queue
        fftQueue = clCreateCommandQueue(fftCtx, fftDev, CL_QUEUE_PROFILING_ENABLE,
                                        &err);
        CL_CHECK_ERROR(err);
    }
    
    // create the program...
    fftProg = clCreateProgramWithSource(fftCtx, 1, &cl_source_fft, NULL, &err);
    CL_CHECK_ERROR(err);
    
    // ...and build it
    string args = " -cl-mad-enable ";
    if (op.getOptionBool("use-native")) {
        args += " -cl-fast-relaxed-math ";
    }
    if (!do_dp) {
        args += " -DSINGLE_PRECISION ";
    }
    else if (checkExtension(fftDev, "cl_khr_fp64")) {
        args += " -DK_DOUBLE_PRECISION ";
    }
    else if (checkExtension(fftDev, "cl_amd_fp64")) {
        args += " -DAMD_DOUBLE_PRECISION ";
    }
    err = clBuildProgram(fftProg, 0, NULL, args.c_str(), NULL, NULL);
    {
        char* log = NULL;
        size_t bytesRequired = 0;
        err = clGetProgramBuildInfo(fftProg, 
                                    fftDev, 
                                    CL_PROGRAM_BUILD_LOG,
                                    0,
                                    NULL,
                                    &bytesRequired );
        log = (char*)malloc( bytesRequired + 1 );
        err = clGetProgramBuildInfo(fftProg, 
                                    fftDev, 
                                    CL_PROGRAM_BUILD_LOG,
                                    bytesRequired,
                                    log,
                                    NULL );
        std::cout << log << std::endl;
        free( log );
    }
    if (err != CL_SUCCESS) {
        char log[50000];
        size_t retsize = 0;
        err = clGetProgramBuildInfo(fftProg, fftDev, CL_PROGRAM_BUILD_LOG,
                                    50000*sizeof(char),  log, &retsize);
        CL_CHECK_ERROR(err);
        cout << "Retsize: " << retsize << endl;
        cout << "Log: " << log << endl;
        dumpPTXCode(fftCtx, fftProg, "oclFFT");
        exit(-1);
    }
    else {
        // dumpPTXCode(fftCtx, fftProg, "oclFFT");
    }

    // Create kernel for forward FFT
    fftKrnl = clCreateKernel(fftProg, "fft1D_512", &err);
    CL_CHECK_ERROR(err);
    // Create kernel for inverse FFT
    ifftKrnl = clCreateKernel(fftProg, "ifft1D_512", &err);
    CL_CHECK_ERROR(err);
    // Create kernel for check
    chkKrnl = clCreateKernel(fftProg, "chk1D_512", &err);
    CL_CHECK_ERROR(err);
}
开发者ID:TakayukiSakai,项目名称:shoc,代码行数:93,代码来源:fftlib.cpp

示例15: RunBenchmark

//  Modifications:
//    Jeremy Meredith, Wed Dec  1 17:05:27 EST 2010
//    Added calculation of latency estimate.
//
void RunBenchmark(cl::Device& devcpp,
                  cl::Context& ctxcpp,
                  cl::CommandQueue& queuecpp,
                  ResultDatabase &resultDB,
                  OptionParser &op)
{
    // convert from C++ bindings to C bindings
    // TODO propagate use of C++ bindings
    cl_device_id id = devcpp();
    cl_context ctx = ctxcpp();
    cl_command_queue queue = queuecpp();

    bool verbose = op.getOptionBool("verbose");
    bool pinned = !op.getOptionBool("nopinned");
    int  npasses = op.getOptionInt("passes");
    const bool waitForEvents = true;

    // Sizes are in kb
    int nSizes  = 20;
    int sizes[20] = {1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,
		     32768,65536,131072,262144,524288};

    // Max sure we don't surpass the OpenCL limit.
    cl_long maxAllocSizeBytes = 0;
    clGetDeviceInfo(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
                    sizeof(cl_long), &maxAllocSizeBytes, NULL);
    while (sizes[nSizes-1]*1024 > 0.90 * maxAllocSizeBytes)
    {
        --nSizes;
        if (verbose) cout << " - dropping allocation size to keep under reported limit.\n";
        if (nSizes < 1)
        {
            cerr << "Error: OpenCL reported a max allocation size less than 1kB.\b";
            return;
        }
    }

    // Create some host memory pattern
    if (verbose) cout << ">> creating host mem pattern\n";
    int err;
    float *hostMem;
    cl_mem hostMemObj;
    long long numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
    if (pinned)
    {
	hostMemObj = clCreateBuffer(ctx,
				    CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
				    sizeof(float)*numMaxFloats, NULL, &err);
        if (err == CL_SUCCESS)
        {
            hostMem = (float*)clEnqueueMapBuffer(queue, hostMemObj, true,
                                                 CL_MAP_READ|CL_MAP_WRITE,
                                                 0,sizeof(float)*numMaxFloats,0,
                                                 NULL,NULL,&err);
        }
	while (err != CL_SUCCESS)
	{
	    // drop the size and try again
	    if (verbose) cout << " - dropping size allocating pinned mem\n";
	    --nSizes;
	    if (nSizes < 1)
	    {
		cerr << "Error: Couldn't allocated any pinned buffer\n";
		return;
	    }
	    numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
	    hostMemObj = clCreateBuffer(ctx,
					CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
					sizeof(float)*numMaxFloats, NULL, &err);
            if (err == CL_SUCCESS)
            {
                hostMem = (float*)clEnqueueMapBuffer(queue, hostMemObj, true,
                                                     CL_MAP_READ|CL_MAP_WRITE,
                                                     0,sizeof(float)*numMaxFloats,0,
                                                     NULL,NULL,&err);
            }
	}
    }
    else
    {
        hostMem = new float[numMaxFloats];
    }
    for (int i=0; i<numMaxFloats; i++)
        hostMem[i] = i % 77;

    // Allocate some device memory
    if (verbose) cout << ">> allocating device mem\n";
    cl_mem mem1 = clCreateBuffer(ctx, CL_MEM_READ_WRITE,
                                 sizeof(float)*numMaxFloats, NULL, &err);
    while (err != CL_SUCCESS)
    {
	// drop the size and try again
	if (verbose) cout << " - dropping size allocating device mem\n";
	--nSizes;
	if (nSizes < 1)
	{
//.........这里部分代码省略.........
开发者ID:dylanzika,项目名称:shoc,代码行数:101,代码来源:BusSpeedDownload.cpp


注:本文中的OptionParser::getOptionBool方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。