commit
c3bddbc632
11 changed files with 1261 additions and 0 deletions
-
6.gitignore
-
12Makefile
-
39README.md
-
131cl-demo.c
-
741cl-helper.c
-
250cl-helper.h
-
7print-devices.c
-
7set-governor
-
2show-clock-freq
-
54timing.h
-
12vec-add-soln.cl
@ -0,0 +1,6 @@ |
|||
.*.sw[op] |
|||
*~ |
|||
a.out |
|||
print-devices |
|||
cl-demo |
|||
*.o |
|||
@ -0,0 +1,12 @@ |
|||
EXECUTABLES = cl-demo print-devices |
|||
|
|||
all: $(EXECUTABLES) |
|||
|
|||
print-devices: print-devices.c cl-helper.c |
|||
gcc -std=gnu99 -o$@ $^ -lrt -lOpenCL |
|||
|
|||
cl-demo: cl-demo.c cl-helper.c |
|||
gcc -std=gnu99 -o$@ $^ -lrt -lOpenCL |
|||
|
|||
clean: |
|||
@rm -f $(EXECUTABLES) *.o |
|||
@ -0,0 +1,39 @@ |
|||
# OpenCL Howto |
|||
|
|||
Code snippets taken from |
|||
[OpenCLHowto](https://wiki.tiker.net/OpenCLHowTo) |
|||
|
|||
## Description |
|||
|
|||
This is just some more playing around with OpenCL and try to learn a bit about |
|||
it. |
|||
|
|||
## Requirements |
|||
|
|||
Some OpenCL capable hardware and the according OpenCL library exposing the |
|||
OpenCL API. I tested this on an Intel GPU (Intel Corporation Haswell-ULT |
|||
Integrated Graphics Controller (rev 09)) with the |
|||
[beignet](https://www.freedesktop.org/wiki/Software/Beignet/) |
|||
open source library. |
|||
|
|||
## License |
|||
|
|||
MIT License |
|||
|
|||
> Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
> of this software and associated documentation files (the "Software"), to |
|||
> deal in the Software without restriction, including without limitation the |
|||
> rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
|||
> sell copies of the Software, and to permit persons to whom the Software is |
|||
> furnished to do so, subject to the following conditions: |
|||
> |
|||
> The above copyright notice and this permission notice shall be included in |
|||
> all copies or substantial portions of the Software. |
|||
> |
|||
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|||
> FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
|||
> IN THE SOFTWARE. |
|||
@ -0,0 +1,131 @@ |
|||
#include "timing.h" |
|||
#include "cl-helper.h" |
|||
|
|||
|
|||
|
|||
|
|||
int main(int argc, char **argv) |
|||
{ |
|||
if (argc != 3) |
|||
{ |
|||
fprintf(stderr, "need two arguments!\n"); |
|||
abort(); |
|||
} |
|||
|
|||
const cl_long n = atol(argv[1]); |
|||
const int ntrips = atoi(argv[2]); |
|||
|
|||
cl_context ctx; |
|||
cl_command_queue queue; |
|||
create_context_on(CHOOSE_INTERACTIVELY, CHOOSE_INTERACTIVELY, 0, &ctx, &queue, 0); |
|||
|
|||
print_device_info_from_queue(queue); |
|||
|
|||
// -------------------------------------------------------------------------- |
|||
// load kernels |
|||
// -------------------------------------------------------------------------- |
|||
char *knl_text = read_file("vec-add-soln.cl"); |
|||
cl_kernel knl = kernel_from_string(ctx, knl_text, "sum", NULL); |
|||
free(knl_text); |
|||
|
|||
// -------------------------------------------------------------------------- |
|||
// allocate and initialize CPU memory |
|||
// -------------------------------------------------------------------------- |
|||
float *a = (float *) malloc(sizeof(float) * n); |
|||
if (!a) { perror("alloc x"); abort(); } |
|||
float *b = (float *) malloc(sizeof(float) * n); |
|||
if (!b) { perror("alloc y"); abort(); } |
|||
float *c = (float *) malloc(sizeof(float) * n); |
|||
if (!c) { perror("alloc z"); abort(); } |
|||
|
|||
for (size_t i = 0; i < n; ++i) |
|||
{ |
|||
a[i] = i; |
|||
b[i] = 2*i; |
|||
} |
|||
|
|||
// -------------------------------------------------------------------------- |
|||
// allocate device memory |
|||
// -------------------------------------------------------------------------- |
|||
cl_int status; |
|||
cl_mem buf_a = clCreateBuffer(ctx, CL_MEM_READ_WRITE, |
|||
sizeof(float) * n, 0, &status); |
|||
CHECK_CL_ERROR(status, "clCreateBuffer"); |
|||
|
|||
cl_mem buf_b = clCreateBuffer(ctx, CL_MEM_READ_WRITE, |
|||
sizeof(float) * n, 0, &status); |
|||
CHECK_CL_ERROR(status, "clCreateBuffer"); |
|||
|
|||
cl_mem buf_c = clCreateBuffer(ctx, CL_MEM_READ_WRITE, |
|||
sizeof(float) * n, 0, &status); |
|||
CHECK_CL_ERROR(status, "clCreateBuffer"); |
|||
|
|||
// -------------------------------------------------------------------------- |
|||
// transfer to device |
|||
// -------------------------------------------------------------------------- |
|||
CALL_CL_GUARDED(clEnqueueWriteBuffer, ( |
|||
queue, buf_a, /*blocking*/ CL_TRUE, /*offset*/ 0, |
|||
n * sizeof(float), a, |
|||
0, NULL, NULL)); |
|||
|
|||
CALL_CL_GUARDED(clEnqueueWriteBuffer, ( |
|||
queue, buf_b, /*blocking*/ CL_TRUE, /*offset*/ 0, |
|||
n * sizeof(float), b, |
|||
0, NULL, NULL)); |
|||
|
|||
// -------------------------------------------------------------------------- |
|||
// run code on device |
|||
// -------------------------------------------------------------------------- |
|||
|
|||
CALL_CL_GUARDED(clFinish, (queue)); |
|||
|
|||
timestamp_type time1, time2; |
|||
get_timestamp(&time1); |
|||
|
|||
for (int trip = 0; trip < ntrips; ++trip) |
|||
{ |
|||
SET_4_KERNEL_ARGS(knl, buf_a, buf_b, buf_c, n); |
|||
size_t ldim[] = { 32 }; |
|||
size_t gdim[] = { ((n + ldim[0] - 1)/ldim[0])*ldim[0] }; |
|||
CALL_CL_GUARDED(clEnqueueNDRangeKernel, |
|||
(queue, knl, |
|||
/*dimensions*/ 1, NULL, gdim, ldim, |
|||
0, NULL, NULL)); |
|||
} |
|||
|
|||
CALL_CL_GUARDED(clFinish, (queue)); |
|||
|
|||
get_timestamp(&time2); |
|||
double elapsed = timestamp_diff_in_seconds(time1,time2)/ntrips; |
|||
printf("%f s\n", elapsed); |
|||
printf("%f GB/s\n", |
|||
3*n*sizeof(float)/1e9/elapsed); |
|||
|
|||
// -------------------------------------------------------------------------- |
|||
// transfer back & check |
|||
// -------------------------------------------------------------------------- |
|||
CALL_CL_GUARDED(clEnqueueReadBuffer, ( |
|||
queue, buf_c, /*blocking*/ CL_TRUE, /*offset*/ 0, |
|||
n * sizeof(float), c, |
|||
0, NULL, NULL)); |
|||
|
|||
for (size_t i = 0; i < n; ++i) |
|||
if (c[i] != 3*i) |
|||
{ |
|||
printf("BAD %ld %f %f!\n", i, c[i], c[i] - 3*i); |
|||
abort(); |
|||
} |
|||
puts("GOOD"); |
|||
|
|||
// -------------------------------------------------------------------------- |
|||
// clean up |
|||
// -------------------------------------------------------------------------- |
|||
CALL_CL_GUARDED(clReleaseMemObject, (buf_a)); |
|||
CALL_CL_GUARDED(clReleaseMemObject, (buf_b)); |
|||
CALL_CL_GUARDED(clReleaseMemObject, (buf_c)); |
|||
CALL_CL_GUARDED(clReleaseKernel, (knl)); |
|||
CALL_CL_GUARDED(clReleaseCommandQueue, (queue)); |
|||
CALL_CL_GUARDED(clReleaseContext, (ctx)); |
|||
|
|||
return 0; |
|||
} |
|||
@ -0,0 +1,741 @@ |
|||
/* |
|||
* Copyright (c) 2010 Andreas Kloeckner |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in |
|||
* all copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
|||
* THE SOFTWARE. |
|||
*/ |
|||
|
|||
|
|||
|
|||
|
|||
#include "cl-helper.h" |
|||
#include <string.h> |
|||
#include <stdbool.h> |
|||
|
|||
|
|||
|
|||
|
|||
#define MAX_NAME_LEN 1000 |
|||
|
|||
|
|||
|
|||
|
|||
const char *cl_error_to_str(cl_int e) |
|||
{ |
|||
switch (e) |
|||
{ |
|||
case CL_SUCCESS: return "success"; |
|||
case CL_DEVICE_NOT_FOUND: return "device not found"; |
|||
case CL_DEVICE_NOT_AVAILABLE: return "device not available"; |
|||
#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) |
|||
case CL_COMPILER_NOT_AVAILABLE: return "device compiler not available"; |
|||
#endif |
|||
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "mem object allocation failure"; |
|||
case CL_OUT_OF_RESOURCES: return "out of resources"; |
|||
case CL_OUT_OF_HOST_MEMORY: return "out of host memory"; |
|||
case CL_PROFILING_INFO_NOT_AVAILABLE: return "profiling info not available"; |
|||
case CL_MEM_COPY_OVERLAP: return "mem copy overlap"; |
|||
case CL_IMAGE_FORMAT_MISMATCH: return "image format mismatch"; |
|||
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "image format not supported"; |
|||
case CL_BUILD_PROGRAM_FAILURE: return "build program failure"; |
|||
case CL_MAP_FAILURE: return "map failure"; |
|||
|
|||
case CL_INVALID_VALUE: return "invalid value"; |
|||
case CL_INVALID_DEVICE_TYPE: return "invalid device type"; |
|||
case CL_INVALID_PLATFORM: return "invalid platform"; |
|||
case CL_INVALID_DEVICE: return "invalid device"; |
|||
case CL_INVALID_CONTEXT: return "invalid context"; |
|||
case CL_INVALID_QUEUE_PROPERTIES: return "invalid queue properties"; |
|||
case CL_INVALID_COMMAND_QUEUE: return "invalid command queue"; |
|||
case CL_INVALID_HOST_PTR: return "invalid host ptr"; |
|||
case CL_INVALID_MEM_OBJECT: return "invalid mem object"; |
|||
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "invalid image format descriptor"; |
|||
case CL_INVALID_IMAGE_SIZE: return "invalid image size"; |
|||
case CL_INVALID_SAMPLER: return "invalid sampler"; |
|||
case CL_INVALID_BINARY: return "invalid binary"; |
|||
case CL_INVALID_BUILD_OPTIONS: return "invalid build options"; |
|||
case CL_INVALID_PROGRAM: return "invalid program"; |
|||
case CL_INVALID_PROGRAM_EXECUTABLE: return "invalid program executable"; |
|||
case CL_INVALID_KERNEL_NAME: return "invalid kernel name"; |
|||
case CL_INVALID_KERNEL_DEFINITION: return "invalid kernel definition"; |
|||
case CL_INVALID_KERNEL: return "invalid kernel"; |
|||
case CL_INVALID_ARG_INDEX: return "invalid arg index"; |
|||
case CL_INVALID_ARG_VALUE: return "invalid arg value"; |
|||
case CL_INVALID_ARG_SIZE: return "invalid arg size"; |
|||
case CL_INVALID_KERNEL_ARGS: return "invalid kernel args"; |
|||
case CL_INVALID_WORK_DIMENSION: return "invalid work dimension"; |
|||
case CL_INVALID_WORK_GROUP_SIZE: return "invalid work group size"; |
|||
case CL_INVALID_WORK_ITEM_SIZE: return "invalid work item size"; |
|||
case CL_INVALID_GLOBAL_OFFSET: return "invalid global offset"; |
|||
case CL_INVALID_EVENT_WAIT_LIST: return "invalid event wait list"; |
|||
case CL_INVALID_EVENT: return "invalid event"; |
|||
case CL_INVALID_OPERATION: return "invalid operation"; |
|||
case CL_INVALID_GL_OBJECT: return "invalid gl object"; |
|||
case CL_INVALID_BUFFER_SIZE: return "invalid buffer size"; |
|||
case CL_INVALID_MIP_LEVEL: return "invalid mip level"; |
|||
|
|||
#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) |
|||
case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR: return "invalid gl sharegroup reference number"; |
|||
#endif |
|||
|
|||
#ifdef CL_VERSION_1_1 |
|||
case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "misaligned sub-buffer offset"; |
|||
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "exec status error for events in wait list"; |
|||
case CL_INVALID_GLOBAL_WORK_SIZE: return "invalid global work size"; |
|||
#endif |
|||
|
|||
default: return "invalid/unknown error code"; |
|||
} |
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
void print_platforms_devices() |
|||
{ |
|||
// get number of platforms |
|||
cl_uint plat_count; |
|||
CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count)); |
|||
|
|||
// allocate memory, get list of platforms |
|||
cl_platform_id *platforms = |
|||
(cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id)); |
|||
CHECK_SYS_ERROR(!platforms, "allocating platform array"); |
|||
|
|||
CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL)); |
|||
|
|||
// iterate over platforms |
|||
for (cl_uint i = 0; i < plat_count; ++i) |
|||
{ |
|||
// get platform vendor name |
|||
char buf[MAX_NAME_LEN]; |
|||
CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, |
|||
sizeof(buf), buf, NULL)); |
|||
printf("platform %d: vendor '%s'\n", i, buf); |
|||
|
|||
// get number of devices in platform |
|||
cl_uint dev_count; |
|||
CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, |
|||
0, NULL, &dev_count)); |
|||
|
|||
cl_device_id *devices = |
|||
(cl_device_id *) malloc(dev_count*sizeof(cl_device_id)); |
|||
CHECK_SYS_ERROR(!devices, "allocating device array"); |
|||
|
|||
// get list of devices in platform |
|||
CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, |
|||
dev_count, devices, NULL)); |
|||
|
|||
// iterate over devices |
|||
for (cl_uint j = 0; j < dev_count; ++j) |
|||
{ |
|||
char buf[MAX_NAME_LEN]; |
|||
CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, |
|||
sizeof(buf), buf, NULL)); |
|||
printf(" device %d: '%s'\n", j, buf); |
|||
} |
|||
|
|||
free(devices); |
|||
} |
|||
|
|||
free(platforms); |
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
/* Read a line from stdin. C makes things simple. :) |
|||
* From http://stackoverflow.com/a/314422/1148634 |
|||
*/ |
|||
char *read_a_line(void) |
|||
{ |
|||
char * line = (char *) malloc(MAX_NAME_LEN), * linep = line; |
|||
size_t lenmax = MAX_NAME_LEN, len = lenmax; |
|||
int c; |
|||
|
|||
if(line == NULL) |
|||
return NULL; |
|||
|
|||
for(;;) |
|||
{ |
|||
c = fgetc(stdin); |
|||
if(c == EOF) |
|||
break; |
|||
|
|||
if(--len == 0) |
|||
{ |
|||
char *linen = (char *) realloc(linep, lenmax *= 2); |
|||
len = lenmax; |
|||
|
|||
if(linen == NULL) |
|||
{ |
|||
free(linep); |
|||
return NULL; |
|||
} |
|||
line = linen + (line - linep); |
|||
linep = linen; |
|||
} |
|||
|
|||
if((*line++ = c) == '\n') |
|||
break; |
|||
} |
|||
*line = '\0'; |
|||
return linep; |
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
const char *CHOOSE_INTERACTIVELY = "INTERACTIVE"; |
|||
|
|||
|
|||
#define MIN(a,b) (((a)<(b))?(a):(b)) |
|||
#define MAX(a,b) (((a)>(b))?(a):(b)) |
|||
|
|||
void create_context_on(const char *plat_name, const char*dev_name, cl_uint idx, |
|||
cl_context *ctx, cl_command_queue *queue, int enable_profiling) |
|||
{ |
|||
char dev_sel_buf[MAX_NAME_LEN]; |
|||
char platform_sel_buf[MAX_NAME_LEN]; |
|||
|
|||
// get number of platforms |
|||
cl_uint plat_count; |
|||
CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count)); |
|||
|
|||
// allocate memory, get list of platform handles |
|||
cl_platform_id *platforms = |
|||
(cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id)); |
|||
CHECK_SYS_ERROR(!platforms, "allocating platform array"); |
|||
CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL)); |
|||
|
|||
// print menu, if requested |
|||
#ifndef CL_HELPER_FORCE_INTERACTIVE |
|||
if (plat_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer |
|||
#endif |
|||
{ |
|||
puts("Choose platform:"); |
|||
for (cl_uint i = 0; i < plat_count; ++i) |
|||
{ |
|||
char buf[MAX_NAME_LEN]; |
|||
CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, |
|||
sizeof(buf), buf, NULL)); |
|||
printf("[%d] %s\n", i, buf); |
|||
} |
|||
|
|||
printf("Enter choice: "); |
|||
fflush(stdout); |
|||
|
|||
char *sel = read_a_line(); |
|||
if (!sel) |
|||
{ |
|||
fprintf(stderr, "error reading line from stdin"); |
|||
abort(); |
|||
} |
|||
|
|||
int sel_int = MIN(MAX(0, atoi(sel)), (int) plat_count-1); |
|||
free(sel); |
|||
|
|||
CALL_CL_GUARDED(clGetPlatformInfo, (platforms[sel_int], CL_PLATFORM_VENDOR, |
|||
sizeof(platform_sel_buf), platform_sel_buf, NULL)); |
|||
plat_name = platform_sel_buf; |
|||
} |
|||
|
|||
// iterate over platforms |
|||
for (cl_uint i = 0; i < plat_count; ++i) |
|||
{ |
|||
// get platform name |
|||
char buf[MAX_NAME_LEN]; |
|||
CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, |
|||
sizeof(buf), buf, NULL)); |
|||
|
|||
// does it match? |
|||
if (!plat_name || strstr(buf, plat_name)) |
|||
{ |
|||
// get number of devices in platform |
|||
cl_uint dev_count; |
|||
CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, |
|||
0, NULL, &dev_count)); |
|||
|
|||
// allocate memory, get list of device handles in platform |
|||
cl_device_id *devices = |
|||
(cl_device_id *) malloc(dev_count*sizeof(cl_device_id)); |
|||
CHECK_SYS_ERROR(!devices, "allocating device array"); |
|||
|
|||
CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, |
|||
dev_count, devices, NULL)); |
|||
|
|||
// {{{ print device menu, if requested |
|||
#ifndef CL_HELPER_FORCE_INTERACTIVE |
|||
if (dev_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer |
|||
#endif |
|||
{ |
|||
puts("Choose device:"); |
|||
for (cl_uint j = 0; j < dev_count; ++j) |
|||
{ |
|||
char buf[MAX_NAME_LEN]; |
|||
CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, |
|||
sizeof(buf), buf, NULL)); |
|||
printf("[%d] %s\n", j, buf); |
|||
} |
|||
|
|||
printf("Enter choice: "); |
|||
fflush(stdout); |
|||
|
|||
char *sel = read_a_line(); |
|||
if (!sel) |
|||
{ |
|||
fprintf(stderr, "error reading line from stdin"); |
|||
abort(); |
|||
} |
|||
|
|||
int int_sel = MIN(MAX(0, atoi(sel)), (int) dev_count-1); |
|||
free(sel); |
|||
|
|||
CALL_CL_GUARDED(clGetDeviceInfo, (devices[int_sel], CL_DEVICE_NAME, |
|||
sizeof(dev_sel_buf), dev_sel_buf, NULL)); |
|||
dev_name = dev_sel_buf; |
|||
} |
|||
|
|||
// }}} |
|||
|
|||
// iterate over devices |
|||
for (cl_uint j = 0; j < dev_count; ++j) |
|||
{ |
|||
// get device name |
|||
char buf[MAX_NAME_LEN]; |
|||
CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, |
|||
sizeof(buf), buf, NULL)); |
|||
|
|||
// does it match? |
|||
if (!dev_name || strstr(buf, dev_name)) |
|||
{ |
|||
if (idx == 0) |
|||
{ |
|||
cl_platform_id plat = platforms[i]; |
|||
cl_device_id dev = devices[j]; |
|||
|
|||
free(devices); |
|||
free(platforms); |
|||
|
|||
// create a context |
|||
cl_context_properties cps[3] = { |
|||
CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0 }; |
|||
|
|||
cl_int status; |
|||
*ctx = clCreateContext( |
|||
cps, 1, &dev, NULL, NULL, &status); |
|||
CHECK_CL_ERROR(status, "clCreateContext"); |
|||
|
|||
// create a command queue |
|||
cl_command_queue_properties qprops = 0; |
|||
if (enable_profiling) |
|||
qprops |= CL_QUEUE_PROFILING_ENABLE; |
|||
|
|||
if (queue) |
|||
{ |
|||
*queue = clCreateCommandQueue(*ctx, dev, qprops, &status); |
|||
CHECK_CL_ERROR(status, "clCreateCommandQueue"); |
|||
} |
|||
|
|||
return; |
|||
} |
|||
else |
|||
--idx; |
|||
} |
|||
} |
|||
|
|||
free(devices); |
|||
} |
|||
} |
|||
|
|||
free(platforms); |
|||
|
|||
fputs("create_context_on: specified device not found.\n", stderr); |
|||
abort(); |
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
char *read_file(const char *filename) |
|||
{ |
|||
FILE *f = fopen(filename, "r"); |
|||
CHECK_SYS_ERROR(!f, "read_file: opening file"); |
|||
|
|||
// figure out file size |
|||
CHECK_SYS_ERROR(fseek(f, 0, SEEK_END) < 0, "read_file: seeking to end"); |
|||
size_t size = ftell(f); |
|||
|
|||
CHECK_SYS_ERROR(fseek(f, 0, SEEK_SET) != 0, |
|||
"read_file: seeking to start"); |
|||
|
|||
// allocate memory, slurp in entire file |
|||
char *result = (char *) malloc(size+1); |
|||
CHECK_SYS_ERROR(!result, "read_file: allocating file contents"); |
|||
CHECK_SYS_ERROR(fread(result, 1, size, f) < size, |
|||
"read_file: reading file contents"); |
|||
|
|||
// close, return |
|||
CHECK_SYS_ERROR(fclose(f), "read_file: closing file"); |
|||
result[size] = '\0'; |
|||
|
|||
return result; |
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
static int printed_compiler_output_message = 0; |
|||
|
|||
cl_kernel kernel_from_string(cl_context ctx, |
|||
char const *knl, char const *knl_name, char const *options) |
|||
{ |
|||
// create an OpenCL program (may have multiple kernels) |
|||
size_t sizes[] = { strlen(knl) }; |
|||
|
|||
if (options && strlen(options) == 0) |
|||
{ |
|||
// reportedly, some implementations dislike empty strings. |
|||
options = NULL; |
|||
} |
|||
|
|||
cl_int status; |
|||
cl_program program = clCreateProgramWithSource(ctx, 1, &knl, sizes, &status); |
|||
CHECK_CL_ERROR(status, "clCreateProgramWithSource"); |
|||
|
|||
// build it |
|||
status = clBuildProgram(program, 0, NULL, options, NULL, NULL); |
|||
|
|||
{ |
|||
// get build log and print it |
|||
|
|||
cl_device_id dev; |
|||
CALL_CL_GUARDED(clGetProgramInfo, (program, CL_PROGRAM_DEVICES, |
|||
sizeof(dev), &dev, NULL)); |
|||
|
|||
size_t log_size; |
|||
CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG, |
|||
0, NULL, &log_size)); |
|||
|
|||
bool do_print = status != CL_SUCCESS; |
|||
if (!do_print && log_size) |
|||
{ |
|||
if (getenv("CL_HELPER_PRINT_COMPILER_OUTPUT")) |
|||
do_print = true; |
|||
else |
|||
{ |
|||
if (!printed_compiler_output_message && !getenv("CL_HELPER_NO_COMPILER_OUTPUT_NAG")) |
|||
{ |
|||
fprintf(stderr, "*** Kernel compilation resulted in non-empty log message.\n" |
|||
"*** Set environment variable CL_HELPER_PRINT_COMPILER_OUTPUT=1 to see more.\n" |
|||
"*** NOTE: this may include compiler warnings and other important messages\n" |
|||
"*** about your code.\n" |
|||
"*** Set CL_HELPER_NO_COMPILER_OUTPUT_NAG=1 to disable this message.\n"); |
|||
printed_compiler_output_message = true; |
|||
} |
|||
} |
|||
} |
|||
|
|||
if (do_print) |
|||
{ |
|||
char *log = (char *) malloc(log_size); |
|||
CHECK_SYS_ERROR(!log, "kernel_from_string: allocate log"); |
|||
|
|||
char devname[MAX_NAME_LEN]; |
|||
CALL_CL_GUARDED(clGetDeviceInfo, (dev, CL_DEVICE_NAME, |
|||
sizeof(devname), devname, NULL)); |
|||
|
|||
CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG, |
|||
log_size, log, NULL)); |
|||
fprintf(stderr, "*** build of '%s' on '%s' said:\n%s\n*** (end of message)\n", |
|||
knl_name, devname, log); |
|||
} |
|||
} |
|||
|
|||
CHECK_CL_ERROR(status, "clBuildProgram"); |
|||
|
|||
// fish the kernel out of the program |
|||
cl_kernel kernel = clCreateKernel(program, knl_name, &status); |
|||
CHECK_CL_ERROR(status, "clCreateKernel"); |
|||
|
|||
CALL_CL_GUARDED(clReleaseProgram, (program)); |
|||
|
|||
return kernel; |
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
void print_device_info(cl_device_id device) |
|||
{ |
|||
// adapted from http://graphics.stanford.edu/~yoel/notes/clInfo.c |
|||
|
|||
#define LONG_PROPS \ |
|||
defn(VENDOR_ID), \ |
|||
defn(MAX_COMPUTE_UNITS), \ |
|||
defn(MAX_WORK_ITEM_DIMENSIONS), \ |
|||
defn(MAX_WORK_GROUP_SIZE), \ |
|||
defn(PREFERRED_VECTOR_WIDTH_CHAR), \ |
|||
defn(PREFERRED_VECTOR_WIDTH_SHORT), \ |
|||
defn(PREFERRED_VECTOR_WIDTH_INT), \ |
|||
defn(PREFERRED_VECTOR_WIDTH_LONG), \ |
|||
defn(PREFERRED_VECTOR_WIDTH_FLOAT), \ |
|||
defn(PREFERRED_VECTOR_WIDTH_DOUBLE), \ |
|||
defn(MAX_CLOCK_FREQUENCY), \ |
|||
defn(ADDRESS_BITS), \ |
|||
defn(MAX_MEM_ALLOC_SIZE), \ |
|||
defn(IMAGE_SUPPORT), \ |
|||
defn(MAX_READ_IMAGE_ARGS), \ |
|||
defn(MAX_WRITE_IMAGE_ARGS), \ |
|||
defn(IMAGE2D_MAX_WIDTH), \ |
|||
defn(IMAGE2D_MAX_HEIGHT), \ |
|||
defn(IMAGE3D_MAX_WIDTH), \ |
|||
defn(IMAGE3D_MAX_HEIGHT), \ |
|||
defn(IMAGE3D_MAX_DEPTH), \ |
|||
defn(MAX_SAMPLERS), \ |
|||
defn(MAX_PARAMETER_SIZE), \ |
|||
defn(MEM_BASE_ADDR_ALIGN), \ |
|||
defn(MIN_DATA_TYPE_ALIGN_SIZE), \ |
|||
defn(GLOBAL_MEM_CACHELINE_SIZE), \ |
|||
defn(GLOBAL_MEM_CACHE_SIZE), \ |
|||
defn(GLOBAL_MEM_SIZE), \ |
|||
defn(MAX_CONSTANT_BUFFER_SIZE), \ |
|||
defn(MAX_CONSTANT_ARGS), \ |
|||
defn(LOCAL_MEM_SIZE), \ |
|||
defn(ERROR_CORRECTION_SUPPORT), \ |
|||
defn(PROFILING_TIMER_RESOLUTION), \ |
|||
defn(ENDIAN_LITTLE), \ |
|||
defn(AVAILABLE), \ |
|||
defn(COMPILER_AVAILABLE), |
|||
|
|||
#define STR_PROPS \ |
|||
defn(NAME), \ |
|||
defn(VENDOR), \ |
|||
defn(PROFILE), \ |
|||
defn(VERSION), \ |
|||
defn(EXTENSIONS), |
|||
|
|||
#define HEX_PROPS \ |
|||
defn(SINGLE_FP_CONFIG), \ |
|||
defn(QUEUE_PROPERTIES), |
|||
|
|||
|
|||
printf("---------------------------------------------------------------------\n"); |
|||
|
|||
|
|||
static struct { cl_device_info param; const char *name; } longProps[] = { |
|||
#define defn(X) { CL_DEVICE_##X, #X } |
|||
LONG_PROPS |
|||
#undef defn |
|||
{ 0, NULL }, |
|||
}; |
|||
static struct { cl_device_info param; const char *name; } hexProps[] = { |
|||
#define defn(X) { CL_DEVICE_##X, #X } |
|||
HEX_PROPS |
|||
#undef defn |
|||
{ 0, NULL }, |
|||
}; |
|||
static struct { cl_device_info param; const char *name; } strProps[] = { |
|||
#define defn(X) { CL_DEVICE_##X, #X } |
|||
STR_PROPS |
|||
#undef defn |
|||
{ CL_DRIVER_VERSION, "DRIVER_VERSION" }, |
|||
{ 0, NULL }, |
|||
}; |
|||
cl_int status; |
|||
size_t size; |
|||
char buf[65536]; |
|||
long long val; /* Avoids unpleasant surprises for some params */ |
|||
int ii; |
|||
|
|||
for (ii = 0; strProps[ii].name != NULL; ii++) |
|||
{ |
|||
status = clGetDeviceInfo(device, strProps[ii].param, sizeof buf, buf, &size); |
|||
if (status != CL_SUCCESS) |
|||
{ |
|||
printf("Unable to get %s: %s!\n", |
|||
strProps[ii].name, cl_error_to_str(status)); |
|||
continue; |
|||
} |
|||
if (size > sizeof buf) |
|||
{ |
|||
printf("Large %s (%zd bytes)! Truncating to %ld!\n", |
|||
strProps[ii].name, size, sizeof buf); |
|||
} |
|||
printf("%s: %s\n", |
|||
strProps[ii].name, buf); |
|||
} |
|||
printf("\n"); |
|||
|
|||
status = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof val, &val, NULL); |
|||
if (status == CL_SUCCESS) |
|||
{ |
|||
printf("Type: "); |
|||
if (val & CL_DEVICE_TYPE_DEFAULT) |
|||
{ |
|||
val &= ~CL_DEVICE_TYPE_DEFAULT; |
|||
printf("Default "); |
|||
} |
|||
if (val & CL_DEVICE_TYPE_CPU) |
|||
{ |
|||
val &= ~CL_DEVICE_TYPE_CPU; |
|||
printf("CPU "); |
|||
} |
|||
if (val & CL_DEVICE_TYPE_GPU) |
|||
{ |
|||
val &= ~CL_DEVICE_TYPE_GPU; |
|||
printf("GPU "); |
|||
} |
|||
if (val & CL_DEVICE_TYPE_ACCELERATOR) |
|||
{ |
|||
val &= ~CL_DEVICE_TYPE_ACCELERATOR; |
|||
printf("Accelerator "); |
|||
} |
|||
if (val != 0) { |
|||
printf("Unknown (0x%llx) ", val); |
|||
} |
|||
printf("\n"); |
|||
} |
|||
else |
|||
{ |
|||
printf("Unable to get TYPE: %s!\n", |
|||
cl_error_to_str(status)); |
|||
} |
|||
|
|||
status = clGetDeviceInfo(device, CL_DEVICE_EXECUTION_CAPABILITIES, |
|||
sizeof val, &val, NULL); |
|||
if (status == CL_SUCCESS) |
|||
{ |
|||
printf("EXECUTION_CAPABILITIES: "); |
|||
if (val & CL_EXEC_KERNEL) |
|||
{ |
|||
val &= ~CL_EXEC_KERNEL; |
|||
printf("Kernel "); |
|||
} |
|||
if (val & CL_EXEC_NATIVE_KERNEL) |
|||
{ |
|||
val &= ~CL_EXEC_NATIVE_KERNEL; |
|||
printf("Native "); |
|||
} |
|||
if (val) |
|||
printf("Unknown (0x%llx) ", val); |
|||
|
|||
printf("\n"); |
|||
} |
|||
else |
|||
{ |
|||
printf("Unable to get EXECUTION_CAPABILITIES: %s!\n", |
|||
cl_error_to_str(status)); |
|||
} |
|||
|
|||
status = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, |
|||
sizeof val, &val, NULL); |
|||
if (status == CL_SUCCESS) |
|||
{ |
|||
static const char *cacheTypes[] = { "None", "Read-Only", "Read-Write" }; |
|||
static int numTypes = sizeof cacheTypes / sizeof cacheTypes[0]; |
|||
|
|||
printf("GLOBAL_MEM_CACHE_TYPE: %s (%lld)\n", |
|||
val < numTypes ? cacheTypes[val] : "???", val); |
|||
} |
|||
else |
|||
{ |
|||
printf("Unable to get GLOBAL_MEM_CACHE_TYPE: %s!\n", |
|||
cl_error_to_str(status)); |
|||
} |
|||
|
|||
status = clGetDeviceInfo(device, |
|||
CL_DEVICE_LOCAL_MEM_TYPE, sizeof val, &val, NULL); |
|||
|
|||
if (status == CL_SUCCESS) |
|||
{ |
|||
static const char *lmemTypes[] = { "???", "Local", "Global" }; |
|||
static int numTypes = sizeof lmemTypes / sizeof lmemTypes[0]; |
|||
|
|||
printf("CL_DEVICE_LOCAL_MEM_TYPE: %s (%lld)\n", |
|||
val < numTypes ? lmemTypes[val] : "???", val); |
|||
} |
|||
else |
|||
{ |
|||
printf("Unable to get CL_DEVICE_LOCAL_MEM_TYPE: %s!\n", |
|||
cl_error_to_str(status)); |
|||
} |
|||
|
|||
for (ii = 0; hexProps[ii].name != NULL; ii++) |
|||
{ |
|||
status = clGetDeviceInfo(device, hexProps[ii].param, sizeof val, &val, &size); |
|||
if (status != CL_SUCCESS) |
|||
{ |
|||
printf("Unable to get %s: %s!\n", |
|||
hexProps[ii].name, cl_error_to_str(status)); |
|||
continue; |
|||
} |
|||
if (size > sizeof val) |
|||
{ |
|||
printf("Large %s (%zd bytes)! Truncating to %ld!\n", |
|||
hexProps[ii].name, size, sizeof val); |
|||
} |
|||
printf("%s: 0x%llx\n", hexProps[ii].name, val); |
|||
} |
|||
printf("\n"); |
|||
|
|||
for (ii = 0; longProps[ii].name != NULL; ii++) |
|||
{ |
|||
status = clGetDeviceInfo(device, longProps[ii].param, sizeof val, &val, &size); |
|||
if (status != CL_SUCCESS) |
|||
{ |
|||
printf("Unable to get %s: %s!\n", |
|||
longProps[ii].name, cl_error_to_str(status)); |
|||
continue; |
|||
} |
|||
if (size > sizeof val) |
|||
{ |
|||
printf("Large %s (%zd bytes)! Truncating to %ld!\n", |
|||
longProps[ii].name, size, sizeof val); |
|||
} |
|||
printf("%s: %lld\n", longProps[ii].name, val); |
|||
} |
|||
|
|||
{ |
|||
size_t size; |
|||
CALL_CL_GUARDED(clGetDeviceInfo, |
|||
(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, 0, 0, &size)); |
|||
|
|||
size_t res_vec[size/sizeof(size_t)]; // C99 VLA yay! |
|||
|
|||
CALL_CL_GUARDED(clGetDeviceInfo, |
|||
(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, size, res_vec, &size)); |
|||
|
|||
printf("MAX_WORK_GROUP_SIZES: "); // a tiny lie |
|||
for (size_t i = 0; i < size/sizeof(size_t); ++i) |
|||
printf("%zd ", res_vec[i]); |
|||
printf("\n"); |
|||
} |
|||
printf("---------------------------------------------------------------------\n"); |
|||
} |
|||
|
|||
|
|||
|
|||
void print_device_info_from_queue(cl_command_queue queue) |
|||
{ |
|||
cl_device_id dev; |
|||
CALL_CL_GUARDED(clGetCommandQueueInfo, |
|||
(queue, CL_QUEUE_DEVICE, sizeof dev, &dev, NULL)); |
|||
|
|||
print_device_info(dev); |
|||
} |
|||
@ -0,0 +1,250 @@ |
|||
/* |
|||
* Copyright (c) 2010, 2012 Andreas Kloeckner |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in |
|||
* all copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
|||
* THE SOFTWARE. |
|||
*/ |
|||
|
|||
|
|||
|
|||
|
|||
#ifndef NYUHPC_CL_HELPER |
|||
#define NYUHPC_CL_HELPER |
|||
|
|||
#include <stdarg.h> |
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
|
|||
#ifdef __APPLE__ |
|||
#include <OpenCL/opencl.h> |
|||
#else |
|||
#include <CL/cl.h> |
|||
#endif |
|||
|
|||
/* An error check macro for OpenCL. |
|||
* |
|||
* Usage: |
|||
* CHECK_CL_ERROR(status_code_from_a_cl_operation, "function_name") |
|||
* |
|||
* It will abort with a message if an error occurred. |
|||
*/ |
|||
|
|||
#define CHECK_CL_ERROR(STATUS_CODE, WHAT) \ |
|||
if ((STATUS_CODE) != CL_SUCCESS) \ |
|||
{ \ |
|||
fprintf(stderr, \ |
|||
"*** '%s' in '%s' on line %d failed with error '%s'.\n", \ |
|||
WHAT, __FILE__, __LINE__, \ |
|||
cl_error_to_str(STATUS_CODE)); \ |
|||
abort(); \ |
|||
} |
|||
|
|||
/* A more automated error check macro for OpenCL, for use with clXxxx |
|||
* functions that return status codes. (Not all of them do, notably |
|||
* clCreateXxx do not.) |
|||
* |
|||
* Usage: |
|||
* CALL_CL_GUARDED(clFunction, (arg1, arg2)); |
|||
* |
|||
* Note the slightly strange comma between the function name and the |
|||
* argument list. |
|||
*/ |
|||
|
|||
#define CALL_CL_GUARDED(NAME, ARGLIST) \ |
|||
{ \ |
|||
cl_int status_code; \ |
|||
status_code = NAME ARGLIST; \ |
|||
CHECK_CL_ERROR(status_code, #NAME); \ |
|||
} |
|||
|
|||
/* An error check macro for Unix system functions. If "COND" is true, then the |
|||
* last system error ("errno") is printed along with MSG, which is supposed to |
|||
* be a string describing what you were doing. |
|||
* |
|||
* Example: |
|||
* CHECK_SYS_ERROR(dave != 0, "opening hatch"); |
|||
*/ |
|||
#define CHECK_SYS_ERROR(COND, MSG) \ |
|||
if (COND) \ |
|||
{ \ |
|||
perror(MSG); \ |
|||
abort(); \ |
|||
} |
|||
|
|||
/* Return a string describing the OpenCL error code 'e'. |
|||
*/ |
|||
const char *cl_error_to_str(cl_int e); |
|||
|
|||
/* Print a list of available OpenCL platforms and devices |
|||
* to standard output. |
|||
*/ |
|||
void print_platforms_devices(); |
|||
|
|||
/* Create an OpenCL context and a matching command queue on a platform from a |
|||
* vendor whose name contains 'plat_name' on a device whose name contains |
|||
* 'dev_name'. Both 'plat_name' and 'dev_name' may be NULL, indicating no |
|||
* preference in the matter. |
|||
* |
|||
* If multiple devices match both 'plat_name' and 'dev_name', then 'idx' |
|||
* prescribes the number of the device that should be chosen. |
|||
* |
|||
* You may also use the special value CHOOSE_INTERACTIVELY to offer the user |
|||
* a choice. You should use this value for code you turn in. |
|||
* |
|||
* This function always succeeds. (If an error occurs, the program |
|||
* is aborted. |
|||
* |
|||
* You can force interactive querying by defining the |
|||
* CL_HELPER_FORCE_INTERACTIVE macro when compiling cl-helper.c. |
|||
* You may do so by passing the -DCL_HELPER_FORCE_INTERACTIVE |
|||
* compiler option. |
|||
*/ |
|||
extern const char *CHOOSE_INTERACTIVELY; |
|||
void create_context_on(const char *plat_name, const char*dev_name, cl_uint |
|||
idx, cl_context *ctx, cl_command_queue *queue, int enable_profiling); |
|||
|
|||
/* Read contents of file 'filename'. |
|||
* Return as a new string. You must free the string when you're done with it. |
|||
* |
|||
* This function always succeeds. (If an error occurs, the program |
|||
* is aborted. |
|||
*/ |
|||
char *read_file(const char *filename); |
|||
|
|||
/* Create a new OpenCL kernel from the code in the string 'knl'. |
|||
* 'knl_name' is the name of the kernel function, and 'options', |
|||
* if not NULL, is a string containing compiler flags. |
|||
* |
|||
* You must release the resulting kernel when you're done |
|||
* with it. |
|||
* |
|||
* This function always succeeds. (If an error occurs, the program |
|||
* is aborted. |
|||
*/ |
|||
cl_kernel kernel_from_string(cl_context ctx, |
|||
char const *knl, char const *knl_name, char const *options); |
|||
|
|||
/* Print information about a device, found from either the |
|||
* queue or the device_id. |
|||
*/ |
|||
void print_device_info(cl_device_id device); |
|||
void print_device_info_from_queue(cl_command_queue queue); |
|||
|
|||
#define SET_1_KERNEL_ARG(knl, arg0) \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); |
|||
|
|||
#define SET_2_KERNEL_ARGS(knl, arg0, arg1) \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); |
|||
|
|||
#define SET_3_KERNEL_ARGS(knl, arg0, arg1, arg2) \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); |
|||
|
|||
#define SET_4_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3) \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); |
|||
|
|||
#define SET_5_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4) \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); |
|||
|
|||
#define SET_6_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5) \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); |
|||
|
|||
#define SET_7_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6) \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); |
|||
|
|||
#define SET_8_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); |
|||
|
|||
#define SET_9_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); |
|||
|
|||
#define SET_10_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9) \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); |
|||
|
|||
#define SET_11_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10) \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 10, sizeof(arg10), &arg10)); |
|||
|
|||
#define SET_12_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11) \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 10, sizeof(arg10), &arg10)); \ |
|||
CALL_CL_GUARDED(clSetKernelArg, (knl, 11, sizeof(arg11), &arg11)); |
|||
|
|||
#endif |
|||
@ -0,0 +1,7 @@ |
|||
#include "cl-helper.h" |
|||
|
|||
int main(int argc, char **argv) |
|||
{ |
|||
print_platforms_devices(); |
|||
return 0; |
|||
} |
|||
@ -0,0 +1,7 @@ |
|||
#! /bin/bash |
|||
|
|||
NCPUS=$(grep processor /proc/cpuinfo | wc -l) |
|||
for i in $(seq 0 $((NCPUS-1)) ); do |
|||
echo $i |
|||
cpufreq-set -g $1 -c $i |
|||
done |
|||
@ -0,0 +1,2 @@ |
|||
#! /bin/sh |
|||
cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_cur_freq |
|||
@ -0,0 +1,54 @@ |
|||
#ifdef __APPLE__ |
|||
|
|||
#include <sys/time.h> |
|||
|
|||
typedef struct timeval timestamp_type; |
|||
|
|||
static void get_timestamp(timestamp_type *t) |
|||
{ |
|||
gettimeofday(t, NULL); |
|||
} |
|||
|
|||
static double timestamp_diff_in_seconds(timestamp_type start, |
|||
timestamp_type end) |
|||
{ |
|||
/* Perform the carry for the later subtraction by updating start. */ |
|||
if (end.tv_usec < start.tv_usec) { |
|||
int nsec = (start.tv_usec - end.tv_usec) / 1000000 + 1; |
|||
start.tv_usec -= 1000000 * nsec; |
|||
start.tv_sec += nsec; |
|||
} |
|||
if (end.tv_usec - start.tv_usec > 1000000) { |
|||
int nsec = (end.tv_usec - start.tv_usec) / 1000000; |
|||
start.tv_usec += 1000000 * nsec; |
|||
start.tv_sec -= nsec; |
|||
} |
|||
|
|||
return end.tv_sec - start.tv_sec + (end.tv_usec - start.tv_usec)*1e-6; |
|||
} |
|||
|
|||
#else |
|||
|
|||
#include <time.h> |
|||
|
|||
typedef struct timespec timestamp_type; |
|||
|
|||
static void get_timestamp(timestamp_type *t) |
|||
{ |
|||
clock_gettime(CLOCK_REALTIME, t); |
|||
} |
|||
|
|||
static double timestamp_diff_in_seconds(timestamp_type start, timestamp_type end) |
|||
{ |
|||
struct timespec temp; |
|||
if ((end.tv_nsec-start.tv_nsec)<0) { |
|||
temp.tv_sec = end.tv_sec-start.tv_sec-1; |
|||
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec; |
|||
} else { |
|||
temp.tv_sec = end.tv_sec-start.tv_sec; |
|||
temp.tv_nsec = end.tv_nsec-start.tv_nsec; |
|||
} |
|||
return temp.tv_sec + 1e-9*temp.tv_nsec; |
|||
} |
|||
|
|||
#endif |
|||
@ -0,0 +1,12 @@ |
|||
#pragma OPENCL EXTENSION cl_khr_fp64: enable |
|||
|
|||
__kernel void sum( |
|||
__global const float *a, |
|||
__global const float *b, |
|||
__global float *c, |
|||
long n) |
|||
{ |
|||
int gid = get_global_id(0); |
|||
if (gid < n) |
|||
c[gid] = a[gid] + b[gid]; |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue