Browse Source

initial commit

master
Georg Hopp 10 years ago
commit
c3bddbc632
  1. 6
      .gitignore
  2. 12
      Makefile
  3. 39
      README.md
  4. 131
      cl-demo.c
  5. 741
      cl-helper.c
  6. 250
      cl-helper.h
  7. 7
      print-devices.c
  8. 7
      set-governor
  9. 2
      show-clock-freq
  10. 54
      timing.h
  11. 12
      vec-add-soln.cl

6
.gitignore

@ -0,0 +1,6 @@
.*.sw[op]
*~
a.out
print-devices
cl-demo
*.o

12
Makefile

@ -0,0 +1,12 @@
EXECUTABLES = cl-demo print-devices
all: $(EXECUTABLES)
print-devices: print-devices.c cl-helper.c
gcc -std=gnu99 -o$@ $^ -lrt -lOpenCL
cl-demo: cl-demo.c cl-helper.c
gcc -std=gnu99 -o$@ $^ -lrt -lOpenCL
clean:
@rm -f $(EXECUTABLES) *.o

39
README.md

@ -0,0 +1,39 @@
# OpenCL Howto
Code snippets taken from
[OpenCLHowto](https://wiki.tiker.net/OpenCLHowTo)
## Description
This is just some more playing around with OpenCL and try to learn a bit about
it.
## Requirements
Some OpenCL capable hardware and the according OpenCL library exposing the
OpenCL API. I tested this on an Intel GPU (Intel Corporation Haswell-ULT
Integrated Graphics Controller (rev 09)) with the
[beignet](https://www.freedesktop.org/wiki/Software/Beignet/)
open source library.
## License
MIT License
> Permission is hereby granted, free of charge, to any person obtaining a copy
> of this software and associated documentation files (the "Software"), to
> deal in the Software without restriction, including without limitation the
> rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
> sell copies of the Software, and to permit persons to whom the Software is
> furnished to do so, subject to the following conditions:
>
> The above copyright notice and this permission notice shall be included in
> all copies or substantial portions of the Software.
>
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> IN THE SOFTWARE.

131
cl-demo.c

@ -0,0 +1,131 @@
#include "timing.h"
#include "cl-helper.h"
int main(int argc, char **argv)
{
if (argc != 3)
{
fprintf(stderr, "need two arguments!\n");
abort();
}
const cl_long n = atol(argv[1]);
const int ntrips = atoi(argv[2]);
cl_context ctx;
cl_command_queue queue;
create_context_on(CHOOSE_INTERACTIVELY, CHOOSE_INTERACTIVELY, 0, &ctx, &queue, 0);
print_device_info_from_queue(queue);
// --------------------------------------------------------------------------
// load kernels
// --------------------------------------------------------------------------
char *knl_text = read_file("vec-add-soln.cl");
cl_kernel knl = kernel_from_string(ctx, knl_text, "sum", NULL);
free(knl_text);
// --------------------------------------------------------------------------
// allocate and initialize CPU memory
// --------------------------------------------------------------------------
float *a = (float *) malloc(sizeof(float) * n);
if (!a) { perror("alloc x"); abort(); }
float *b = (float *) malloc(sizeof(float) * n);
if (!b) { perror("alloc y"); abort(); }
float *c = (float *) malloc(sizeof(float) * n);
if (!c) { perror("alloc z"); abort(); }
for (size_t i = 0; i < n; ++i)
{
a[i] = i;
b[i] = 2*i;
}
// --------------------------------------------------------------------------
// allocate device memory
// --------------------------------------------------------------------------
cl_int status;
cl_mem buf_a = clCreateBuffer(ctx, CL_MEM_READ_WRITE,
sizeof(float) * n, 0, &status);
CHECK_CL_ERROR(status, "clCreateBuffer");
cl_mem buf_b = clCreateBuffer(ctx, CL_MEM_READ_WRITE,
sizeof(float) * n, 0, &status);
CHECK_CL_ERROR(status, "clCreateBuffer");
cl_mem buf_c = clCreateBuffer(ctx, CL_MEM_READ_WRITE,
sizeof(float) * n, 0, &status);
CHECK_CL_ERROR(status, "clCreateBuffer");
// --------------------------------------------------------------------------
// transfer to device
// --------------------------------------------------------------------------
CALL_CL_GUARDED(clEnqueueWriteBuffer, (
queue, buf_a, /*blocking*/ CL_TRUE, /*offset*/ 0,
n * sizeof(float), a,
0, NULL, NULL));
CALL_CL_GUARDED(clEnqueueWriteBuffer, (
queue, buf_b, /*blocking*/ CL_TRUE, /*offset*/ 0,
n * sizeof(float), b,
0, NULL, NULL));
// --------------------------------------------------------------------------
// run code on device
// --------------------------------------------------------------------------
CALL_CL_GUARDED(clFinish, (queue));
timestamp_type time1, time2;
get_timestamp(&time1);
for (int trip = 0; trip < ntrips; ++trip)
{
SET_4_KERNEL_ARGS(knl, buf_a, buf_b, buf_c, n);
size_t ldim[] = { 32 };
size_t gdim[] = { ((n + ldim[0] - 1)/ldim[0])*ldim[0] };
CALL_CL_GUARDED(clEnqueueNDRangeKernel,
(queue, knl,
/*dimensions*/ 1, NULL, gdim, ldim,
0, NULL, NULL));
}
CALL_CL_GUARDED(clFinish, (queue));
get_timestamp(&time2);
double elapsed = timestamp_diff_in_seconds(time1,time2)/ntrips;
printf("%f s\n", elapsed);
printf("%f GB/s\n",
3*n*sizeof(float)/1e9/elapsed);
// --------------------------------------------------------------------------
// transfer back & check
// --------------------------------------------------------------------------
CALL_CL_GUARDED(clEnqueueReadBuffer, (
queue, buf_c, /*blocking*/ CL_TRUE, /*offset*/ 0,
n * sizeof(float), c,
0, NULL, NULL));
for (size_t i = 0; i < n; ++i)
if (c[i] != 3*i)
{
printf("BAD %ld %f %f!\n", i, c[i], c[i] - 3*i);
abort();
}
puts("GOOD");
// --------------------------------------------------------------------------
// clean up
// --------------------------------------------------------------------------
CALL_CL_GUARDED(clReleaseMemObject, (buf_a));
CALL_CL_GUARDED(clReleaseMemObject, (buf_b));
CALL_CL_GUARDED(clReleaseMemObject, (buf_c));
CALL_CL_GUARDED(clReleaseKernel, (knl));
CALL_CL_GUARDED(clReleaseCommandQueue, (queue));
CALL_CL_GUARDED(clReleaseContext, (ctx));
return 0;
}

741
cl-helper.c

@ -0,0 +1,741 @@
/*
* Copyright (c) 2010 Andreas Kloeckner
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "cl-helper.h"
#include <string.h>
#include <stdbool.h>
#define MAX_NAME_LEN 1000
const char *cl_error_to_str(cl_int e)
{
switch (e)
{
case CL_SUCCESS: return "success";
case CL_DEVICE_NOT_FOUND: return "device not found";
case CL_DEVICE_NOT_AVAILABLE: return "device not available";
#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001)
case CL_COMPILER_NOT_AVAILABLE: return "device compiler not available";
#endif
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "mem object allocation failure";
case CL_OUT_OF_RESOURCES: return "out of resources";
case CL_OUT_OF_HOST_MEMORY: return "out of host memory";
case CL_PROFILING_INFO_NOT_AVAILABLE: return "profiling info not available";
case CL_MEM_COPY_OVERLAP: return "mem copy overlap";
case CL_IMAGE_FORMAT_MISMATCH: return "image format mismatch";
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "image format not supported";
case CL_BUILD_PROGRAM_FAILURE: return "build program failure";
case CL_MAP_FAILURE: return "map failure";
case CL_INVALID_VALUE: return "invalid value";
case CL_INVALID_DEVICE_TYPE: return "invalid device type";
case CL_INVALID_PLATFORM: return "invalid platform";
case CL_INVALID_DEVICE: return "invalid device";
case CL_INVALID_CONTEXT: return "invalid context";
case CL_INVALID_QUEUE_PROPERTIES: return "invalid queue properties";
case CL_INVALID_COMMAND_QUEUE: return "invalid command queue";
case CL_INVALID_HOST_PTR: return "invalid host ptr";
case CL_INVALID_MEM_OBJECT: return "invalid mem object";
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "invalid image format descriptor";
case CL_INVALID_IMAGE_SIZE: return "invalid image size";
case CL_INVALID_SAMPLER: return "invalid sampler";
case CL_INVALID_BINARY: return "invalid binary";
case CL_INVALID_BUILD_OPTIONS: return "invalid build options";
case CL_INVALID_PROGRAM: return "invalid program";
case CL_INVALID_PROGRAM_EXECUTABLE: return "invalid program executable";
case CL_INVALID_KERNEL_NAME: return "invalid kernel name";
case CL_INVALID_KERNEL_DEFINITION: return "invalid kernel definition";
case CL_INVALID_KERNEL: return "invalid kernel";
case CL_INVALID_ARG_INDEX: return "invalid arg index";
case CL_INVALID_ARG_VALUE: return "invalid arg value";
case CL_INVALID_ARG_SIZE: return "invalid arg size";
case CL_INVALID_KERNEL_ARGS: return "invalid kernel args";
case CL_INVALID_WORK_DIMENSION: return "invalid work dimension";
case CL_INVALID_WORK_GROUP_SIZE: return "invalid work group size";
case CL_INVALID_WORK_ITEM_SIZE: return "invalid work item size";
case CL_INVALID_GLOBAL_OFFSET: return "invalid global offset";
case CL_INVALID_EVENT_WAIT_LIST: return "invalid event wait list";
case CL_INVALID_EVENT: return "invalid event";
case CL_INVALID_OPERATION: return "invalid operation";
case CL_INVALID_GL_OBJECT: return "invalid gl object";
case CL_INVALID_BUFFER_SIZE: return "invalid buffer size";
case CL_INVALID_MIP_LEVEL: return "invalid mip level";
#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1)
case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR: return "invalid gl sharegroup reference number";
#endif
#ifdef CL_VERSION_1_1
case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "misaligned sub-buffer offset";
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "exec status error for events in wait list";
case CL_INVALID_GLOBAL_WORK_SIZE: return "invalid global work size";
#endif
default: return "invalid/unknown error code";
}
}
void print_platforms_devices()
{
// get number of platforms
cl_uint plat_count;
CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count));
// allocate memory, get list of platforms
cl_platform_id *platforms =
(cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id));
CHECK_SYS_ERROR(!platforms, "allocating platform array");
CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL));
// iterate over platforms
for (cl_uint i = 0; i < plat_count; ++i)
{
// get platform vendor name
char buf[MAX_NAME_LEN];
CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR,
sizeof(buf), buf, NULL));
printf("platform %d: vendor '%s'\n", i, buf);
// get number of devices in platform
cl_uint dev_count;
CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL,
0, NULL, &dev_count));
cl_device_id *devices =
(cl_device_id *) malloc(dev_count*sizeof(cl_device_id));
CHECK_SYS_ERROR(!devices, "allocating device array");
// get list of devices in platform
CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL,
dev_count, devices, NULL));
// iterate over devices
for (cl_uint j = 0; j < dev_count; ++j)
{
char buf[MAX_NAME_LEN];
CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME,
sizeof(buf), buf, NULL));
printf(" device %d: '%s'\n", j, buf);
}
free(devices);
}
free(platforms);
}
/* Read a line from stdin. C makes things simple. :)
* From http://stackoverflow.com/a/314422/1148634
*/
char *read_a_line(void)
{
char * line = (char *) malloc(MAX_NAME_LEN), * linep = line;
size_t lenmax = MAX_NAME_LEN, len = lenmax;
int c;
if(line == NULL)
return NULL;
for(;;)
{
c = fgetc(stdin);
if(c == EOF)
break;
if(--len == 0)
{
char *linen = (char *) realloc(linep, lenmax *= 2);
len = lenmax;
if(linen == NULL)
{
free(linep);
return NULL;
}
line = linen + (line - linep);
linep = linen;
}
if((*line++ = c) == '\n')
break;
}
*line = '\0';
return linep;
}
const char *CHOOSE_INTERACTIVELY = "INTERACTIVE";
#define MIN(a,b) (((a)<(b))?(a):(b))
#define MAX(a,b) (((a)>(b))?(a):(b))
void create_context_on(const char *plat_name, const char*dev_name, cl_uint idx,
cl_context *ctx, cl_command_queue *queue, int enable_profiling)
{
char dev_sel_buf[MAX_NAME_LEN];
char platform_sel_buf[MAX_NAME_LEN];
// get number of platforms
cl_uint plat_count;
CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count));
// allocate memory, get list of platform handles
cl_platform_id *platforms =
(cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id));
CHECK_SYS_ERROR(!platforms, "allocating platform array");
CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL));
// print menu, if requested
#ifndef CL_HELPER_FORCE_INTERACTIVE
if (plat_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer
#endif
{
puts("Choose platform:");
for (cl_uint i = 0; i < plat_count; ++i)
{
char buf[MAX_NAME_LEN];
CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR,
sizeof(buf), buf, NULL));
printf("[%d] %s\n", i, buf);
}
printf("Enter choice: ");
fflush(stdout);
char *sel = read_a_line();
if (!sel)
{
fprintf(stderr, "error reading line from stdin");
abort();
}
int sel_int = MIN(MAX(0, atoi(sel)), (int) plat_count-1);
free(sel);
CALL_CL_GUARDED(clGetPlatformInfo, (platforms[sel_int], CL_PLATFORM_VENDOR,
sizeof(platform_sel_buf), platform_sel_buf, NULL));
plat_name = platform_sel_buf;
}
// iterate over platforms
for (cl_uint i = 0; i < plat_count; ++i)
{
// get platform name
char buf[MAX_NAME_LEN];
CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR,
sizeof(buf), buf, NULL));
// does it match?
if (!plat_name || strstr(buf, plat_name))
{
// get number of devices in platform
cl_uint dev_count;
CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL,
0, NULL, &dev_count));
// allocate memory, get list of device handles in platform
cl_device_id *devices =
(cl_device_id *) malloc(dev_count*sizeof(cl_device_id));
CHECK_SYS_ERROR(!devices, "allocating device array");
CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL,
dev_count, devices, NULL));
// {{{ print device menu, if requested
#ifndef CL_HELPER_FORCE_INTERACTIVE
if (dev_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer
#endif
{
puts("Choose device:");
for (cl_uint j = 0; j < dev_count; ++j)
{
char buf[MAX_NAME_LEN];
CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME,
sizeof(buf), buf, NULL));
printf("[%d] %s\n", j, buf);
}
printf("Enter choice: ");
fflush(stdout);
char *sel = read_a_line();
if (!sel)
{
fprintf(stderr, "error reading line from stdin");
abort();
}
int int_sel = MIN(MAX(0, atoi(sel)), (int) dev_count-1);
free(sel);
CALL_CL_GUARDED(clGetDeviceInfo, (devices[int_sel], CL_DEVICE_NAME,
sizeof(dev_sel_buf), dev_sel_buf, NULL));
dev_name = dev_sel_buf;
}
// }}}
// iterate over devices
for (cl_uint j = 0; j < dev_count; ++j)
{
// get device name
char buf[MAX_NAME_LEN];
CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME,
sizeof(buf), buf, NULL));
// does it match?
if (!dev_name || strstr(buf, dev_name))
{
if (idx == 0)
{
cl_platform_id plat = platforms[i];
cl_device_id dev = devices[j];
free(devices);
free(platforms);
// create a context
cl_context_properties cps[3] = {
CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0 };
cl_int status;
*ctx = clCreateContext(
cps, 1, &dev, NULL, NULL, &status);
CHECK_CL_ERROR(status, "clCreateContext");
// create a command queue
cl_command_queue_properties qprops = 0;
if (enable_profiling)
qprops |= CL_QUEUE_PROFILING_ENABLE;
if (queue)
{
*queue = clCreateCommandQueue(*ctx, dev, qprops, &status);
CHECK_CL_ERROR(status, "clCreateCommandQueue");
}
return;
}
else
--idx;
}
}
free(devices);
}
}
free(platforms);
fputs("create_context_on: specified device not found.\n", stderr);
abort();
}
char *read_file(const char *filename)
{
FILE *f = fopen(filename, "r");
CHECK_SYS_ERROR(!f, "read_file: opening file");
// figure out file size
CHECK_SYS_ERROR(fseek(f, 0, SEEK_END) < 0, "read_file: seeking to end");
size_t size = ftell(f);
CHECK_SYS_ERROR(fseek(f, 0, SEEK_SET) != 0,
"read_file: seeking to start");
// allocate memory, slurp in entire file
char *result = (char *) malloc(size+1);
CHECK_SYS_ERROR(!result, "read_file: allocating file contents");
CHECK_SYS_ERROR(fread(result, 1, size, f) < size,
"read_file: reading file contents");
// close, return
CHECK_SYS_ERROR(fclose(f), "read_file: closing file");
result[size] = '\0';
return result;
}
static int printed_compiler_output_message = 0;
cl_kernel kernel_from_string(cl_context ctx,
char const *knl, char const *knl_name, char const *options)
{
// create an OpenCL program (may have multiple kernels)
size_t sizes[] = { strlen(knl) };
if (options && strlen(options) == 0)
{
// reportedly, some implementations dislike empty strings.
options = NULL;
}
cl_int status;
cl_program program = clCreateProgramWithSource(ctx, 1, &knl, sizes, &status);
CHECK_CL_ERROR(status, "clCreateProgramWithSource");
// build it
status = clBuildProgram(program, 0, NULL, options, NULL, NULL);
{
// get build log and print it
cl_device_id dev;
CALL_CL_GUARDED(clGetProgramInfo, (program, CL_PROGRAM_DEVICES,
sizeof(dev), &dev, NULL));
size_t log_size;
CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG,
0, NULL, &log_size));
bool do_print = status != CL_SUCCESS;
if (!do_print && log_size)
{
if (getenv("CL_HELPER_PRINT_COMPILER_OUTPUT"))
do_print = true;
else
{
if (!printed_compiler_output_message && !getenv("CL_HELPER_NO_COMPILER_OUTPUT_NAG"))
{
fprintf(stderr, "*** Kernel compilation resulted in non-empty log message.\n"
"*** Set environment variable CL_HELPER_PRINT_COMPILER_OUTPUT=1 to see more.\n"
"*** NOTE: this may include compiler warnings and other important messages\n"
"*** about your code.\n"
"*** Set CL_HELPER_NO_COMPILER_OUTPUT_NAG=1 to disable this message.\n");
printed_compiler_output_message = true;
}
}
}
if (do_print)
{
char *log = (char *) malloc(log_size);
CHECK_SYS_ERROR(!log, "kernel_from_string: allocate log");
char devname[MAX_NAME_LEN];
CALL_CL_GUARDED(clGetDeviceInfo, (dev, CL_DEVICE_NAME,
sizeof(devname), devname, NULL));
CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG,
log_size, log, NULL));
fprintf(stderr, "*** build of '%s' on '%s' said:\n%s\n*** (end of message)\n",
knl_name, devname, log);
}
}
CHECK_CL_ERROR(status, "clBuildProgram");
// fish the kernel out of the program
cl_kernel kernel = clCreateKernel(program, knl_name, &status);
CHECK_CL_ERROR(status, "clCreateKernel");
CALL_CL_GUARDED(clReleaseProgram, (program));
return kernel;
}
void print_device_info(cl_device_id device)
{
// adapted from http://graphics.stanford.edu/~yoel/notes/clInfo.c
#define LONG_PROPS \
defn(VENDOR_ID), \
defn(MAX_COMPUTE_UNITS), \
defn(MAX_WORK_ITEM_DIMENSIONS), \
defn(MAX_WORK_GROUP_SIZE), \
defn(PREFERRED_VECTOR_WIDTH_CHAR), \
defn(PREFERRED_VECTOR_WIDTH_SHORT), \
defn(PREFERRED_VECTOR_WIDTH_INT), \
defn(PREFERRED_VECTOR_WIDTH_LONG), \
defn(PREFERRED_VECTOR_WIDTH_FLOAT), \
defn(PREFERRED_VECTOR_WIDTH_DOUBLE), \
defn(MAX_CLOCK_FREQUENCY), \
defn(ADDRESS_BITS), \
defn(MAX_MEM_ALLOC_SIZE), \
defn(IMAGE_SUPPORT), \
defn(MAX_READ_IMAGE_ARGS), \
defn(MAX_WRITE_IMAGE_ARGS), \
defn(IMAGE2D_MAX_WIDTH), \
defn(IMAGE2D_MAX_HEIGHT), \
defn(IMAGE3D_MAX_WIDTH), \
defn(IMAGE3D_MAX_HEIGHT), \
defn(IMAGE3D_MAX_DEPTH), \
defn(MAX_SAMPLERS), \
defn(MAX_PARAMETER_SIZE), \
defn(MEM_BASE_ADDR_ALIGN), \
defn(MIN_DATA_TYPE_ALIGN_SIZE), \
defn(GLOBAL_MEM_CACHELINE_SIZE), \
defn(GLOBAL_MEM_CACHE_SIZE), \
defn(GLOBAL_MEM_SIZE), \
defn(MAX_CONSTANT_BUFFER_SIZE), \
defn(MAX_CONSTANT_ARGS), \
defn(LOCAL_MEM_SIZE), \
defn(ERROR_CORRECTION_SUPPORT), \
defn(PROFILING_TIMER_RESOLUTION), \
defn(ENDIAN_LITTLE), \
defn(AVAILABLE), \
defn(COMPILER_AVAILABLE),
#define STR_PROPS \
defn(NAME), \
defn(VENDOR), \
defn(PROFILE), \
defn(VERSION), \
defn(EXTENSIONS),
#define HEX_PROPS \
defn(SINGLE_FP_CONFIG), \
defn(QUEUE_PROPERTIES),
printf("---------------------------------------------------------------------\n");
static struct { cl_device_info param; const char *name; } longProps[] = {
#define defn(X) { CL_DEVICE_##X, #X }
LONG_PROPS
#undef defn
{ 0, NULL },
};
static struct { cl_device_info param; const char *name; } hexProps[] = {
#define defn(X) { CL_DEVICE_##X, #X }
HEX_PROPS
#undef defn
{ 0, NULL },
};
static struct { cl_device_info param; const char *name; } strProps[] = {
#define defn(X) { CL_DEVICE_##X, #X }
STR_PROPS
#undef defn
{ CL_DRIVER_VERSION, "DRIVER_VERSION" },
{ 0, NULL },
};
cl_int status;
size_t size;
char buf[65536];
long long val; /* Avoids unpleasant surprises for some params */
int ii;
for (ii = 0; strProps[ii].name != NULL; ii++)
{
status = clGetDeviceInfo(device, strProps[ii].param, sizeof buf, buf, &size);
if (status != CL_SUCCESS)
{
printf("Unable to get %s: %s!\n",
strProps[ii].name, cl_error_to_str(status));
continue;
}
if (size > sizeof buf)
{
printf("Large %s (%zd bytes)! Truncating to %ld!\n",
strProps[ii].name, size, sizeof buf);
}
printf("%s: %s\n",
strProps[ii].name, buf);
}
printf("\n");
status = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof val, &val, NULL);
if (status == CL_SUCCESS)
{
printf("Type: ");
if (val & CL_DEVICE_TYPE_DEFAULT)
{
val &= ~CL_DEVICE_TYPE_DEFAULT;
printf("Default ");
}
if (val & CL_DEVICE_TYPE_CPU)
{
val &= ~CL_DEVICE_TYPE_CPU;
printf("CPU ");
}
if (val & CL_DEVICE_TYPE_GPU)
{
val &= ~CL_DEVICE_TYPE_GPU;
printf("GPU ");
}
if (val & CL_DEVICE_TYPE_ACCELERATOR)
{
val &= ~CL_DEVICE_TYPE_ACCELERATOR;
printf("Accelerator ");
}
if (val != 0) {
printf("Unknown (0x%llx) ", val);
}
printf("\n");
}
else
{
printf("Unable to get TYPE: %s!\n",
cl_error_to_str(status));
}
status = clGetDeviceInfo(device, CL_DEVICE_EXECUTION_CAPABILITIES,
sizeof val, &val, NULL);
if (status == CL_SUCCESS)
{
printf("EXECUTION_CAPABILITIES: ");
if (val & CL_EXEC_KERNEL)
{
val &= ~CL_EXEC_KERNEL;
printf("Kernel ");
}
if (val & CL_EXEC_NATIVE_KERNEL)
{
val &= ~CL_EXEC_NATIVE_KERNEL;
printf("Native ");
}
if (val)
printf("Unknown (0x%llx) ", val);
printf("\n");
}
else
{
printf("Unable to get EXECUTION_CAPABILITIES: %s!\n",
cl_error_to_str(status));
}
status = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE,
sizeof val, &val, NULL);
if (status == CL_SUCCESS)
{
static const char *cacheTypes[] = { "None", "Read-Only", "Read-Write" };
static int numTypes = sizeof cacheTypes / sizeof cacheTypes[0];
printf("GLOBAL_MEM_CACHE_TYPE: %s (%lld)\n",
val < numTypes ? cacheTypes[val] : "???", val);
}
else
{
printf("Unable to get GLOBAL_MEM_CACHE_TYPE: %s!\n",
cl_error_to_str(status));
}
status = clGetDeviceInfo(device,
CL_DEVICE_LOCAL_MEM_TYPE, sizeof val, &val, NULL);
if (status == CL_SUCCESS)
{
static const char *lmemTypes[] = { "???", "Local", "Global" };
static int numTypes = sizeof lmemTypes / sizeof lmemTypes[0];
printf("CL_DEVICE_LOCAL_MEM_TYPE: %s (%lld)\n",
val < numTypes ? lmemTypes[val] : "???", val);
}
else
{
printf("Unable to get CL_DEVICE_LOCAL_MEM_TYPE: %s!\n",
cl_error_to_str(status));
}
for (ii = 0; hexProps[ii].name != NULL; ii++)
{
status = clGetDeviceInfo(device, hexProps[ii].param, sizeof val, &val, &size);
if (status != CL_SUCCESS)
{
printf("Unable to get %s: %s!\n",
hexProps[ii].name, cl_error_to_str(status));
continue;
}
if (size > sizeof val)
{
printf("Large %s (%zd bytes)! Truncating to %ld!\n",
hexProps[ii].name, size, sizeof val);
}
printf("%s: 0x%llx\n", hexProps[ii].name, val);
}
printf("\n");
for (ii = 0; longProps[ii].name != NULL; ii++)
{
status = clGetDeviceInfo(device, longProps[ii].param, sizeof val, &val, &size);
if (status != CL_SUCCESS)
{
printf("Unable to get %s: %s!\n",
longProps[ii].name, cl_error_to_str(status));
continue;
}
if (size > sizeof val)
{
printf("Large %s (%zd bytes)! Truncating to %ld!\n",
longProps[ii].name, size, sizeof val);
}
printf("%s: %lld\n", longProps[ii].name, val);
}
{
size_t size;
CALL_CL_GUARDED(clGetDeviceInfo,
(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, 0, 0, &size));
size_t res_vec[size/sizeof(size_t)]; // C99 VLA yay!
CALL_CL_GUARDED(clGetDeviceInfo,
(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, size, res_vec, &size));
printf("MAX_WORK_GROUP_SIZES: "); // a tiny lie
for (size_t i = 0; i < size/sizeof(size_t); ++i)
printf("%zd ", res_vec[i]);
printf("\n");
}
printf("---------------------------------------------------------------------\n");
}
void print_device_info_from_queue(cl_command_queue queue)
{
cl_device_id dev;
CALL_CL_GUARDED(clGetCommandQueueInfo,
(queue, CL_QUEUE_DEVICE, sizeof dev, &dev, NULL));
print_device_info(dev);
}

250
cl-helper.h

@ -0,0 +1,250 @@
/*
* Copyright (c) 2010, 2012 Andreas Kloeckner
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef NYUHPC_CL_HELPER
#define NYUHPC_CL_HELPER
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
/* An error check macro for OpenCL.
*
* Usage:
* CHECK_CL_ERROR(status_code_from_a_cl_operation, "function_name")
*
* It will abort with a message if an error occurred.
*/
#define CHECK_CL_ERROR(STATUS_CODE, WHAT) \
if ((STATUS_CODE) != CL_SUCCESS) \
{ \
fprintf(stderr, \
"*** '%s' in '%s' on line %d failed with error '%s'.\n", \
WHAT, __FILE__, __LINE__, \
cl_error_to_str(STATUS_CODE)); \
abort(); \
}
/* A more automated error check macro for OpenCL, for use with clXxxx
* functions that return status codes. (Not all of them do, notably
* clCreateXxx do not.)
*
* Usage:
* CALL_CL_GUARDED(clFunction, (arg1, arg2));
*
* Note the slightly strange comma between the function name and the
* argument list.
*/
#define CALL_CL_GUARDED(NAME, ARGLIST) \
{ \
cl_int status_code; \
status_code = NAME ARGLIST; \
CHECK_CL_ERROR(status_code, #NAME); \
}
/* An error check macro for Unix system functions. If "COND" is true, then the
* last system error ("errno") is printed along with MSG, which is supposed to
* be a string describing what you were doing.
*
* Example:
* CHECK_SYS_ERROR(dave != 0, "opening hatch");
*/
#define CHECK_SYS_ERROR(COND, MSG) \
if (COND) \
{ \
perror(MSG); \
abort(); \
}
/* Return a string describing the OpenCL error code 'e'.
*/
const char *cl_error_to_str(cl_int e);
/* Print a list of available OpenCL platforms and devices
* to standard output.
*/
void print_platforms_devices();
/* Create an OpenCL context and a matching command queue on a platform from a
* vendor whose name contains 'plat_name' on a device whose name contains
* 'dev_name'. Both 'plat_name' and 'dev_name' may be NULL, indicating no
* preference in the matter.
*
* If multiple devices match both 'plat_name' and 'dev_name', then 'idx'
* prescribes the number of the device that should be chosen.
*
* You may also use the special value CHOOSE_INTERACTIVELY to offer the user
* a choice. You should use this value for code you turn in.
*
* This function always succeeds. (If an error occurs, the program
* is aborted.
*
* You can force interactive querying by defining the
* CL_HELPER_FORCE_INTERACTIVE macro when compiling cl-helper.c.
* You may do so by passing the -DCL_HELPER_FORCE_INTERACTIVE
* compiler option.
*/
extern const char *CHOOSE_INTERACTIVELY;
void create_context_on(const char *plat_name, const char*dev_name, cl_uint
idx, cl_context *ctx, cl_command_queue *queue, int enable_profiling);
/* Read contents of file 'filename'.
* Return as a new string. You must free the string when you're done with it.
*
* This function always succeeds. (If an error occurs, the program
* is aborted.
*/
char *read_file(const char *filename);
/* Create a new OpenCL kernel from the code in the string 'knl'.
* 'knl_name' is the name of the kernel function, and 'options',
* if not NULL, is a string containing compiler flags.
*
* You must release the resulting kernel when you're done
* with it.
*
* This function always succeeds. (If an error occurs, the program
* is aborted.
*/
cl_kernel kernel_from_string(cl_context ctx,
char const *knl, char const *knl_name, char const *options);
/* Print information about a device, found from either the
* queue or the device_id.
*/
void print_device_info(cl_device_id device);
void print_device_info_from_queue(cl_command_queue queue);
#define SET_1_KERNEL_ARG(knl, arg0) \
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0));
#define SET_2_KERNEL_ARGS(knl, arg0, arg1) \
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1));
#define SET_3_KERNEL_ARGS(knl, arg0, arg1, arg2) \
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2));
#define SET_4_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3) \
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3));
#define SET_5_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4) \
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4));
#define SET_6_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5) \
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5));
#define SET_7_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6) \
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6));
#define SET_8_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7));
#define SET_9_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8));
#define SET_10_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9) \
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9));
#define SET_11_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10) \
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 10, sizeof(arg10), &arg10));
#define SET_12_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11) \
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 10, sizeof(arg10), &arg10)); \
CALL_CL_GUARDED(clSetKernelArg, (knl, 11, sizeof(arg11), &arg11));
#endif

7
print-devices.c

@ -0,0 +1,7 @@
#include "cl-helper.h"
int main(int argc, char **argv)
{
print_platforms_devices();
return 0;
}

7
set-governor

@ -0,0 +1,7 @@
#! /bin/bash
NCPUS=$(grep processor /proc/cpuinfo | wc -l)
for i in $(seq 0 $((NCPUS-1)) ); do
echo $i
cpufreq-set -g $1 -c $i
done

2
show-clock-freq

@ -0,0 +1,2 @@
#! /bin/sh
cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_cur_freq

54
timing.h

@ -0,0 +1,54 @@
#ifdef __APPLE__
#include <sys/time.h>
typedef struct timeval timestamp_type;
static void get_timestamp(timestamp_type *t)
{
gettimeofday(t, NULL);
}
static double timestamp_diff_in_seconds(timestamp_type start,
timestamp_type end)
{
/* Perform the carry for the later subtraction by updating start. */
if (end.tv_usec < start.tv_usec) {
int nsec = (start.tv_usec - end.tv_usec) / 1000000 + 1;
start.tv_usec -= 1000000 * nsec;
start.tv_sec += nsec;
}
if (end.tv_usec - start.tv_usec > 1000000) {
int nsec = (end.tv_usec - start.tv_usec) / 1000000;
start.tv_usec += 1000000 * nsec;
start.tv_sec -= nsec;
}
return end.tv_sec - start.tv_sec + (end.tv_usec - start.tv_usec)*1e-6;
}
#else
#include <time.h>
typedef struct timespec timestamp_type;
static void get_timestamp(timestamp_type *t)
{
clock_gettime(CLOCK_REALTIME, t);
}
static double timestamp_diff_in_seconds(timestamp_type start, timestamp_type end)
{
struct timespec temp;
if ((end.tv_nsec-start.tv_nsec)<0) {
temp.tv_sec = end.tv_sec-start.tv_sec-1;
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec-start.tv_sec;
temp.tv_nsec = end.tv_nsec-start.tv_nsec;
}
return temp.tv_sec + 1e-9*temp.tv_nsec;
}
#endif

12
vec-add-soln.cl

@ -0,0 +1,12 @@
#pragma OPENCL EXTENSION cl_khr_fp64: enable
__kernel void sum(
__global const float *a,
__global const float *b,
__global float *c,
long n)
{
int gid = get_global_id(0);
if (gid < n)
c[gid] = a[gid] + b[gid];
}
Loading…
Cancel
Save