commit
c3bddbc632
11 changed files with 1261 additions and 0 deletions
-
6.gitignore
-
12Makefile
-
39README.md
-
131cl-demo.c
-
741cl-helper.c
-
250cl-helper.h
-
7print-devices.c
-
7set-governor
-
2show-clock-freq
-
54timing.h
-
12vec-add-soln.cl
@ -0,0 +1,6 @@ |
|||||
|
.*.sw[op] |
||||
|
*~ |
||||
|
a.out |
||||
|
print-devices |
||||
|
cl-demo |
||||
|
*.o |
||||
@ -0,0 +1,12 @@ |
|||||
|
EXECUTABLES = cl-demo print-devices |
||||
|
|
||||
|
all: $(EXECUTABLES) |
||||
|
|
||||
|
print-devices: print-devices.c cl-helper.c |
||||
|
gcc -std=gnu99 -o$@ $^ -lrt -lOpenCL |
||||
|
|
||||
|
cl-demo: cl-demo.c cl-helper.c |
||||
|
gcc -std=gnu99 -o$@ $^ -lrt -lOpenCL |
||||
|
|
||||
|
clean: |
||||
|
@rm -f $(EXECUTABLES) *.o |
||||
@ -0,0 +1,39 @@ |
|||||
|
# OpenCL Howto |
||||
|
|
||||
|
Code snippets taken from |
||||
|
[OpenCLHowto](https://wiki.tiker.net/OpenCLHowTo) |
||||
|
|
||||
|
## Description |
||||
|
|
||||
|
This is just some more playing around with OpenCL and try to learn a bit about |
||||
|
it. |
||||
|
|
||||
|
## Requirements |
||||
|
|
||||
|
Some OpenCL capable hardware and the according OpenCL library exposing the |
||||
|
OpenCL API. I tested this on an Intel GPU (Intel Corporation Haswell-ULT |
||||
|
Integrated Graphics Controller (rev 09)) with the |
||||
|
[beignet](https://www.freedesktop.org/wiki/Software/Beignet/) |
||||
|
open source library. |
||||
|
|
||||
|
## License |
||||
|
|
||||
|
MIT License |
||||
|
|
||||
|
> Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
> of this software and associated documentation files (the "Software"), to |
||||
|
> deal in the Software without restriction, including without limitation the |
||||
|
> rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
||||
|
> sell copies of the Software, and to permit persons to whom the Software is |
||||
|
> furnished to do so, subject to the following conditions: |
||||
|
> |
||||
|
> The above copyright notice and this permission notice shall be included in |
||||
|
> all copies or substantial portions of the Software. |
||||
|
> |
||||
|
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||||
|
> FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
||||
|
> IN THE SOFTWARE. |
||||
@ -0,0 +1,131 @@ |
|||||
|
#include "timing.h" |
||||
|
#include "cl-helper.h" |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
int main(int argc, char **argv) |
||||
|
{ |
||||
|
if (argc != 3) |
||||
|
{ |
||||
|
fprintf(stderr, "need two arguments!\n"); |
||||
|
abort(); |
||||
|
} |
||||
|
|
||||
|
const cl_long n = atol(argv[1]); |
||||
|
const int ntrips = atoi(argv[2]); |
||||
|
|
||||
|
cl_context ctx; |
||||
|
cl_command_queue queue; |
||||
|
create_context_on(CHOOSE_INTERACTIVELY, CHOOSE_INTERACTIVELY, 0, &ctx, &queue, 0); |
||||
|
|
||||
|
print_device_info_from_queue(queue); |
||||
|
|
||||
|
// -------------------------------------------------------------------------- |
||||
|
// load kernels |
||||
|
// -------------------------------------------------------------------------- |
||||
|
char *knl_text = read_file("vec-add-soln.cl"); |
||||
|
cl_kernel knl = kernel_from_string(ctx, knl_text, "sum", NULL); |
||||
|
free(knl_text); |
||||
|
|
||||
|
// -------------------------------------------------------------------------- |
||||
|
// allocate and initialize CPU memory |
||||
|
// -------------------------------------------------------------------------- |
||||
|
float *a = (float *) malloc(sizeof(float) * n); |
||||
|
if (!a) { perror("alloc x"); abort(); } |
||||
|
float *b = (float *) malloc(sizeof(float) * n); |
||||
|
if (!b) { perror("alloc y"); abort(); } |
||||
|
float *c = (float *) malloc(sizeof(float) * n); |
||||
|
if (!c) { perror("alloc z"); abort(); } |
||||
|
|
||||
|
for (size_t i = 0; i < n; ++i) |
||||
|
{ |
||||
|
a[i] = i; |
||||
|
b[i] = 2*i; |
||||
|
} |
||||
|
|
||||
|
// -------------------------------------------------------------------------- |
||||
|
// allocate device memory |
||||
|
// -------------------------------------------------------------------------- |
||||
|
cl_int status; |
||||
|
cl_mem buf_a = clCreateBuffer(ctx, CL_MEM_READ_WRITE, |
||||
|
sizeof(float) * n, 0, &status); |
||||
|
CHECK_CL_ERROR(status, "clCreateBuffer"); |
||||
|
|
||||
|
cl_mem buf_b = clCreateBuffer(ctx, CL_MEM_READ_WRITE, |
||||
|
sizeof(float) * n, 0, &status); |
||||
|
CHECK_CL_ERROR(status, "clCreateBuffer"); |
||||
|
|
||||
|
cl_mem buf_c = clCreateBuffer(ctx, CL_MEM_READ_WRITE, |
||||
|
sizeof(float) * n, 0, &status); |
||||
|
CHECK_CL_ERROR(status, "clCreateBuffer"); |
||||
|
|
||||
|
// -------------------------------------------------------------------------- |
||||
|
// transfer to device |
||||
|
// -------------------------------------------------------------------------- |
||||
|
CALL_CL_GUARDED(clEnqueueWriteBuffer, ( |
||||
|
queue, buf_a, /*blocking*/ CL_TRUE, /*offset*/ 0, |
||||
|
n * sizeof(float), a, |
||||
|
0, NULL, NULL)); |
||||
|
|
||||
|
CALL_CL_GUARDED(clEnqueueWriteBuffer, ( |
||||
|
queue, buf_b, /*blocking*/ CL_TRUE, /*offset*/ 0, |
||||
|
n * sizeof(float), b, |
||||
|
0, NULL, NULL)); |
||||
|
|
||||
|
// -------------------------------------------------------------------------- |
||||
|
// run code on device |
||||
|
// -------------------------------------------------------------------------- |
||||
|
|
||||
|
CALL_CL_GUARDED(clFinish, (queue)); |
||||
|
|
||||
|
timestamp_type time1, time2; |
||||
|
get_timestamp(&time1); |
||||
|
|
||||
|
for (int trip = 0; trip < ntrips; ++trip) |
||||
|
{ |
||||
|
SET_4_KERNEL_ARGS(knl, buf_a, buf_b, buf_c, n); |
||||
|
size_t ldim[] = { 32 }; |
||||
|
size_t gdim[] = { ((n + ldim[0] - 1)/ldim[0])*ldim[0] }; |
||||
|
CALL_CL_GUARDED(clEnqueueNDRangeKernel, |
||||
|
(queue, knl, |
||||
|
/*dimensions*/ 1, NULL, gdim, ldim, |
||||
|
0, NULL, NULL)); |
||||
|
} |
||||
|
|
||||
|
CALL_CL_GUARDED(clFinish, (queue)); |
||||
|
|
||||
|
get_timestamp(&time2); |
||||
|
double elapsed = timestamp_diff_in_seconds(time1,time2)/ntrips; |
||||
|
printf("%f s\n", elapsed); |
||||
|
printf("%f GB/s\n", |
||||
|
3*n*sizeof(float)/1e9/elapsed); |
||||
|
|
||||
|
// -------------------------------------------------------------------------- |
||||
|
// transfer back & check |
||||
|
// -------------------------------------------------------------------------- |
||||
|
CALL_CL_GUARDED(clEnqueueReadBuffer, ( |
||||
|
queue, buf_c, /*blocking*/ CL_TRUE, /*offset*/ 0, |
||||
|
n * sizeof(float), c, |
||||
|
0, NULL, NULL)); |
||||
|
|
||||
|
for (size_t i = 0; i < n; ++i) |
||||
|
if (c[i] != 3*i) |
||||
|
{ |
||||
|
printf("BAD %ld %f %f!\n", i, c[i], c[i] - 3*i); |
||||
|
abort(); |
||||
|
} |
||||
|
puts("GOOD"); |
||||
|
|
||||
|
// -------------------------------------------------------------------------- |
||||
|
// clean up |
||||
|
// -------------------------------------------------------------------------- |
||||
|
CALL_CL_GUARDED(clReleaseMemObject, (buf_a)); |
||||
|
CALL_CL_GUARDED(clReleaseMemObject, (buf_b)); |
||||
|
CALL_CL_GUARDED(clReleaseMemObject, (buf_c)); |
||||
|
CALL_CL_GUARDED(clReleaseKernel, (knl)); |
||||
|
CALL_CL_GUARDED(clReleaseCommandQueue, (queue)); |
||||
|
CALL_CL_GUARDED(clReleaseContext, (ctx)); |
||||
|
|
||||
|
return 0; |
||||
|
} |
||||
@ -0,0 +1,741 @@ |
|||||
|
/* |
||||
|
* Copyright (c) 2010 Andreas Kloeckner |
||||
|
* |
||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
* of this software and associated documentation files (the "Software"), to deal |
||||
|
* in the Software without restriction, including without limitation the rights |
||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
* copies of the Software, and to permit persons to whom the Software is |
||||
|
* furnished to do so, subject to the following conditions: |
||||
|
* |
||||
|
* The above copyright notice and this permission notice shall be included in |
||||
|
* all copies or substantial portions of the Software. |
||||
|
* |
||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
* THE SOFTWARE. |
||||
|
*/ |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
#include "cl-helper.h" |
||||
|
#include <string.h> |
||||
|
#include <stdbool.h> |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
#define MAX_NAME_LEN 1000 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
const char *cl_error_to_str(cl_int e) |
||||
|
{ |
||||
|
switch (e) |
||||
|
{ |
||||
|
case CL_SUCCESS: return "success"; |
||||
|
case CL_DEVICE_NOT_FOUND: return "device not found"; |
||||
|
case CL_DEVICE_NOT_AVAILABLE: return "device not available"; |
||||
|
#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) |
||||
|
case CL_COMPILER_NOT_AVAILABLE: return "device compiler not available"; |
||||
|
#endif |
||||
|
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "mem object allocation failure"; |
||||
|
case CL_OUT_OF_RESOURCES: return "out of resources"; |
||||
|
case CL_OUT_OF_HOST_MEMORY: return "out of host memory"; |
||||
|
case CL_PROFILING_INFO_NOT_AVAILABLE: return "profiling info not available"; |
||||
|
case CL_MEM_COPY_OVERLAP: return "mem copy overlap"; |
||||
|
case CL_IMAGE_FORMAT_MISMATCH: return "image format mismatch"; |
||||
|
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "image format not supported"; |
||||
|
case CL_BUILD_PROGRAM_FAILURE: return "build program failure"; |
||||
|
case CL_MAP_FAILURE: return "map failure"; |
||||
|
|
||||
|
case CL_INVALID_VALUE: return "invalid value"; |
||||
|
case CL_INVALID_DEVICE_TYPE: return "invalid device type"; |
||||
|
case CL_INVALID_PLATFORM: return "invalid platform"; |
||||
|
case CL_INVALID_DEVICE: return "invalid device"; |
||||
|
case CL_INVALID_CONTEXT: return "invalid context"; |
||||
|
case CL_INVALID_QUEUE_PROPERTIES: return "invalid queue properties"; |
||||
|
case CL_INVALID_COMMAND_QUEUE: return "invalid command queue"; |
||||
|
case CL_INVALID_HOST_PTR: return "invalid host ptr"; |
||||
|
case CL_INVALID_MEM_OBJECT: return "invalid mem object"; |
||||
|
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "invalid image format descriptor"; |
||||
|
case CL_INVALID_IMAGE_SIZE: return "invalid image size"; |
||||
|
case CL_INVALID_SAMPLER: return "invalid sampler"; |
||||
|
case CL_INVALID_BINARY: return "invalid binary"; |
||||
|
case CL_INVALID_BUILD_OPTIONS: return "invalid build options"; |
||||
|
case CL_INVALID_PROGRAM: return "invalid program"; |
||||
|
case CL_INVALID_PROGRAM_EXECUTABLE: return "invalid program executable"; |
||||
|
case CL_INVALID_KERNEL_NAME: return "invalid kernel name"; |
||||
|
case CL_INVALID_KERNEL_DEFINITION: return "invalid kernel definition"; |
||||
|
case CL_INVALID_KERNEL: return "invalid kernel"; |
||||
|
case CL_INVALID_ARG_INDEX: return "invalid arg index"; |
||||
|
case CL_INVALID_ARG_VALUE: return "invalid arg value"; |
||||
|
case CL_INVALID_ARG_SIZE: return "invalid arg size"; |
||||
|
case CL_INVALID_KERNEL_ARGS: return "invalid kernel args"; |
||||
|
case CL_INVALID_WORK_DIMENSION: return "invalid work dimension"; |
||||
|
case CL_INVALID_WORK_GROUP_SIZE: return "invalid work group size"; |
||||
|
case CL_INVALID_WORK_ITEM_SIZE: return "invalid work item size"; |
||||
|
case CL_INVALID_GLOBAL_OFFSET: return "invalid global offset"; |
||||
|
case CL_INVALID_EVENT_WAIT_LIST: return "invalid event wait list"; |
||||
|
case CL_INVALID_EVENT: return "invalid event"; |
||||
|
case CL_INVALID_OPERATION: return "invalid operation"; |
||||
|
case CL_INVALID_GL_OBJECT: return "invalid gl object"; |
||||
|
case CL_INVALID_BUFFER_SIZE: return "invalid buffer size"; |
||||
|
case CL_INVALID_MIP_LEVEL: return "invalid mip level"; |
||||
|
|
||||
|
#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) |
||||
|
case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR: return "invalid gl sharegroup reference number"; |
||||
|
#endif |
||||
|
|
||||
|
#ifdef CL_VERSION_1_1 |
||||
|
case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "misaligned sub-buffer offset"; |
||||
|
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "exec status error for events in wait list"; |
||||
|
case CL_INVALID_GLOBAL_WORK_SIZE: return "invalid global work size"; |
||||
|
#endif |
||||
|
|
||||
|
default: return "invalid/unknown error code"; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
void print_platforms_devices() |
||||
|
{ |
||||
|
// get number of platforms |
||||
|
cl_uint plat_count; |
||||
|
CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count)); |
||||
|
|
||||
|
// allocate memory, get list of platforms |
||||
|
cl_platform_id *platforms = |
||||
|
(cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id)); |
||||
|
CHECK_SYS_ERROR(!platforms, "allocating platform array"); |
||||
|
|
||||
|
CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL)); |
||||
|
|
||||
|
// iterate over platforms |
||||
|
for (cl_uint i = 0; i < plat_count; ++i) |
||||
|
{ |
||||
|
// get platform vendor name |
||||
|
char buf[MAX_NAME_LEN]; |
||||
|
CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, |
||||
|
sizeof(buf), buf, NULL)); |
||||
|
printf("platform %d: vendor '%s'\n", i, buf); |
||||
|
|
||||
|
// get number of devices in platform |
||||
|
cl_uint dev_count; |
||||
|
CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, |
||||
|
0, NULL, &dev_count)); |
||||
|
|
||||
|
cl_device_id *devices = |
||||
|
(cl_device_id *) malloc(dev_count*sizeof(cl_device_id)); |
||||
|
CHECK_SYS_ERROR(!devices, "allocating device array"); |
||||
|
|
||||
|
// get list of devices in platform |
||||
|
CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, |
||||
|
dev_count, devices, NULL)); |
||||
|
|
||||
|
// iterate over devices |
||||
|
for (cl_uint j = 0; j < dev_count; ++j) |
||||
|
{ |
||||
|
char buf[MAX_NAME_LEN]; |
||||
|
CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, |
||||
|
sizeof(buf), buf, NULL)); |
||||
|
printf(" device %d: '%s'\n", j, buf); |
||||
|
} |
||||
|
|
||||
|
free(devices); |
||||
|
} |
||||
|
|
||||
|
free(platforms); |
||||
|
} |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
/* Read a line from stdin. C makes things simple. :) |
||||
|
* From http://stackoverflow.com/a/314422/1148634 |
||||
|
*/ |
||||
|
char *read_a_line(void) |
||||
|
{ |
||||
|
char * line = (char *) malloc(MAX_NAME_LEN), * linep = line; |
||||
|
size_t lenmax = MAX_NAME_LEN, len = lenmax; |
||||
|
int c; |
||||
|
|
||||
|
if(line == NULL) |
||||
|
return NULL; |
||||
|
|
||||
|
for(;;) |
||||
|
{ |
||||
|
c = fgetc(stdin); |
||||
|
if(c == EOF) |
||||
|
break; |
||||
|
|
||||
|
if(--len == 0) |
||||
|
{ |
||||
|
char *linen = (char *) realloc(linep, lenmax *= 2); |
||||
|
len = lenmax; |
||||
|
|
||||
|
if(linen == NULL) |
||||
|
{ |
||||
|
free(linep); |
||||
|
return NULL; |
||||
|
} |
||||
|
line = linen + (line - linep); |
||||
|
linep = linen; |
||||
|
} |
||||
|
|
||||
|
if((*line++ = c) == '\n') |
||||
|
break; |
||||
|
} |
||||
|
*line = '\0'; |
||||
|
return linep; |
||||
|
} |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
const char *CHOOSE_INTERACTIVELY = "INTERACTIVE"; |
||||
|
|
||||
|
|
||||
|
#define MIN(a,b) (((a)<(b))?(a):(b)) |
||||
|
#define MAX(a,b) (((a)>(b))?(a):(b)) |
||||
|
|
||||
|
void create_context_on(const char *plat_name, const char*dev_name, cl_uint idx, |
||||
|
cl_context *ctx, cl_command_queue *queue, int enable_profiling) |
||||
|
{ |
||||
|
char dev_sel_buf[MAX_NAME_LEN]; |
||||
|
char platform_sel_buf[MAX_NAME_LEN]; |
||||
|
|
||||
|
// get number of platforms |
||||
|
cl_uint plat_count; |
||||
|
CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count)); |
||||
|
|
||||
|
// allocate memory, get list of platform handles |
||||
|
cl_platform_id *platforms = |
||||
|
(cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id)); |
||||
|
CHECK_SYS_ERROR(!platforms, "allocating platform array"); |
||||
|
CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL)); |
||||
|
|
||||
|
// print menu, if requested |
||||
|
#ifndef CL_HELPER_FORCE_INTERACTIVE |
||||
|
if (plat_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer |
||||
|
#endif |
||||
|
{ |
||||
|
puts("Choose platform:"); |
||||
|
for (cl_uint i = 0; i < plat_count; ++i) |
||||
|
{ |
||||
|
char buf[MAX_NAME_LEN]; |
||||
|
CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, |
||||
|
sizeof(buf), buf, NULL)); |
||||
|
printf("[%d] %s\n", i, buf); |
||||
|
} |
||||
|
|
||||
|
printf("Enter choice: "); |
||||
|
fflush(stdout); |
||||
|
|
||||
|
char *sel = read_a_line(); |
||||
|
if (!sel) |
||||
|
{ |
||||
|
fprintf(stderr, "error reading line from stdin"); |
||||
|
abort(); |
||||
|
} |
||||
|
|
||||
|
int sel_int = MIN(MAX(0, atoi(sel)), (int) plat_count-1); |
||||
|
free(sel); |
||||
|
|
||||
|
CALL_CL_GUARDED(clGetPlatformInfo, (platforms[sel_int], CL_PLATFORM_VENDOR, |
||||
|
sizeof(platform_sel_buf), platform_sel_buf, NULL)); |
||||
|
plat_name = platform_sel_buf; |
||||
|
} |
||||
|
|
||||
|
// iterate over platforms |
||||
|
for (cl_uint i = 0; i < plat_count; ++i) |
||||
|
{ |
||||
|
// get platform name |
||||
|
char buf[MAX_NAME_LEN]; |
||||
|
CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, |
||||
|
sizeof(buf), buf, NULL)); |
||||
|
|
||||
|
// does it match? |
||||
|
if (!plat_name || strstr(buf, plat_name)) |
||||
|
{ |
||||
|
// get number of devices in platform |
||||
|
cl_uint dev_count; |
||||
|
CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, |
||||
|
0, NULL, &dev_count)); |
||||
|
|
||||
|
// allocate memory, get list of device handles in platform |
||||
|
cl_device_id *devices = |
||||
|
(cl_device_id *) malloc(dev_count*sizeof(cl_device_id)); |
||||
|
CHECK_SYS_ERROR(!devices, "allocating device array"); |
||||
|
|
||||
|
CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, |
||||
|
dev_count, devices, NULL)); |
||||
|
|
||||
|
// {{{ print device menu, if requested |
||||
|
#ifndef CL_HELPER_FORCE_INTERACTIVE |
||||
|
if (dev_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer |
||||
|
#endif |
||||
|
{ |
||||
|
puts("Choose device:"); |
||||
|
for (cl_uint j = 0; j < dev_count; ++j) |
||||
|
{ |
||||
|
char buf[MAX_NAME_LEN]; |
||||
|
CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, |
||||
|
sizeof(buf), buf, NULL)); |
||||
|
printf("[%d] %s\n", j, buf); |
||||
|
} |
||||
|
|
||||
|
printf("Enter choice: "); |
||||
|
fflush(stdout); |
||||
|
|
||||
|
char *sel = read_a_line(); |
||||
|
if (!sel) |
||||
|
{ |
||||
|
fprintf(stderr, "error reading line from stdin"); |
||||
|
abort(); |
||||
|
} |
||||
|
|
||||
|
int int_sel = MIN(MAX(0, atoi(sel)), (int) dev_count-1); |
||||
|
free(sel); |
||||
|
|
||||
|
CALL_CL_GUARDED(clGetDeviceInfo, (devices[int_sel], CL_DEVICE_NAME, |
||||
|
sizeof(dev_sel_buf), dev_sel_buf, NULL)); |
||||
|
dev_name = dev_sel_buf; |
||||
|
} |
||||
|
|
||||
|
// }}} |
||||
|
|
||||
|
// iterate over devices |
||||
|
for (cl_uint j = 0; j < dev_count; ++j) |
||||
|
{ |
||||
|
// get device name |
||||
|
char buf[MAX_NAME_LEN]; |
||||
|
CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, |
||||
|
sizeof(buf), buf, NULL)); |
||||
|
|
||||
|
// does it match? |
||||
|
if (!dev_name || strstr(buf, dev_name)) |
||||
|
{ |
||||
|
if (idx == 0) |
||||
|
{ |
||||
|
cl_platform_id plat = platforms[i]; |
||||
|
cl_device_id dev = devices[j]; |
||||
|
|
||||
|
free(devices); |
||||
|
free(platforms); |
||||
|
|
||||
|
// create a context |
||||
|
cl_context_properties cps[3] = { |
||||
|
CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0 }; |
||||
|
|
||||
|
cl_int status; |
||||
|
*ctx = clCreateContext( |
||||
|
cps, 1, &dev, NULL, NULL, &status); |
||||
|
CHECK_CL_ERROR(status, "clCreateContext"); |
||||
|
|
||||
|
// create a command queue |
||||
|
cl_command_queue_properties qprops = 0; |
||||
|
if (enable_profiling) |
||||
|
qprops |= CL_QUEUE_PROFILING_ENABLE; |
||||
|
|
||||
|
if (queue) |
||||
|
{ |
||||
|
*queue = clCreateCommandQueue(*ctx, dev, qprops, &status); |
||||
|
CHECK_CL_ERROR(status, "clCreateCommandQueue"); |
||||
|
} |
||||
|
|
||||
|
return; |
||||
|
} |
||||
|
else |
||||
|
--idx; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
free(devices); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
free(platforms); |
||||
|
|
||||
|
fputs("create_context_on: specified device not found.\n", stderr); |
||||
|
abort(); |
||||
|
} |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
char *read_file(const char *filename) |
||||
|
{ |
||||
|
FILE *f = fopen(filename, "r"); |
||||
|
CHECK_SYS_ERROR(!f, "read_file: opening file"); |
||||
|
|
||||
|
// figure out file size |
||||
|
CHECK_SYS_ERROR(fseek(f, 0, SEEK_END) < 0, "read_file: seeking to end"); |
||||
|
size_t size = ftell(f); |
||||
|
|
||||
|
CHECK_SYS_ERROR(fseek(f, 0, SEEK_SET) != 0, |
||||
|
"read_file: seeking to start"); |
||||
|
|
||||
|
// allocate memory, slurp in entire file |
||||
|
char *result = (char *) malloc(size+1); |
||||
|
CHECK_SYS_ERROR(!result, "read_file: allocating file contents"); |
||||
|
CHECK_SYS_ERROR(fread(result, 1, size, f) < size, |
||||
|
"read_file: reading file contents"); |
||||
|
|
||||
|
// close, return |
||||
|
CHECK_SYS_ERROR(fclose(f), "read_file: closing file"); |
||||
|
result[size] = '\0'; |
||||
|
|
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
static int printed_compiler_output_message = 0; |
||||
|
|
||||
|
cl_kernel kernel_from_string(cl_context ctx, |
||||
|
char const *knl, char const *knl_name, char const *options) |
||||
|
{ |
||||
|
// create an OpenCL program (may have multiple kernels) |
||||
|
size_t sizes[] = { strlen(knl) }; |
||||
|
|
||||
|
if (options && strlen(options) == 0) |
||||
|
{ |
||||
|
// reportedly, some implementations dislike empty strings. |
||||
|
options = NULL; |
||||
|
} |
||||
|
|
||||
|
cl_int status; |
||||
|
cl_program program = clCreateProgramWithSource(ctx, 1, &knl, sizes, &status); |
||||
|
CHECK_CL_ERROR(status, "clCreateProgramWithSource"); |
||||
|
|
||||
|
// build it |
||||
|
status = clBuildProgram(program, 0, NULL, options, NULL, NULL); |
||||
|
|
||||
|
{ |
||||
|
// get build log and print it |
||||
|
|
||||
|
cl_device_id dev; |
||||
|
CALL_CL_GUARDED(clGetProgramInfo, (program, CL_PROGRAM_DEVICES, |
||||
|
sizeof(dev), &dev, NULL)); |
||||
|
|
||||
|
size_t log_size; |
||||
|
CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG, |
||||
|
0, NULL, &log_size)); |
||||
|
|
||||
|
bool do_print = status != CL_SUCCESS; |
||||
|
if (!do_print && log_size) |
||||
|
{ |
||||
|
if (getenv("CL_HELPER_PRINT_COMPILER_OUTPUT")) |
||||
|
do_print = true; |
||||
|
else |
||||
|
{ |
||||
|
if (!printed_compiler_output_message && !getenv("CL_HELPER_NO_COMPILER_OUTPUT_NAG")) |
||||
|
{ |
||||
|
fprintf(stderr, "*** Kernel compilation resulted in non-empty log message.\n" |
||||
|
"*** Set environment variable CL_HELPER_PRINT_COMPILER_OUTPUT=1 to see more.\n" |
||||
|
"*** NOTE: this may include compiler warnings and other important messages\n" |
||||
|
"*** about your code.\n" |
||||
|
"*** Set CL_HELPER_NO_COMPILER_OUTPUT_NAG=1 to disable this message.\n"); |
||||
|
printed_compiler_output_message = true; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (do_print) |
||||
|
{ |
||||
|
char *log = (char *) malloc(log_size); |
||||
|
CHECK_SYS_ERROR(!log, "kernel_from_string: allocate log"); |
||||
|
|
||||
|
char devname[MAX_NAME_LEN]; |
||||
|
CALL_CL_GUARDED(clGetDeviceInfo, (dev, CL_DEVICE_NAME, |
||||
|
sizeof(devname), devname, NULL)); |
||||
|
|
||||
|
CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG, |
||||
|
log_size, log, NULL)); |
||||
|
fprintf(stderr, "*** build of '%s' on '%s' said:\n%s\n*** (end of message)\n", |
||||
|
knl_name, devname, log); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
CHECK_CL_ERROR(status, "clBuildProgram"); |
||||
|
|
||||
|
// fish the kernel out of the program |
||||
|
cl_kernel kernel = clCreateKernel(program, knl_name, &status); |
||||
|
CHECK_CL_ERROR(status, "clCreateKernel"); |
||||
|
|
||||
|
CALL_CL_GUARDED(clReleaseProgram, (program)); |
||||
|
|
||||
|
return kernel; |
||||
|
} |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
void print_device_info(cl_device_id device) |
||||
|
{ |
||||
|
// adapted from http://graphics.stanford.edu/~yoel/notes/clInfo.c |
||||
|
|
||||
|
#define LONG_PROPS \ |
||||
|
defn(VENDOR_ID), \ |
||||
|
defn(MAX_COMPUTE_UNITS), \ |
||||
|
defn(MAX_WORK_ITEM_DIMENSIONS), \ |
||||
|
defn(MAX_WORK_GROUP_SIZE), \ |
||||
|
defn(PREFERRED_VECTOR_WIDTH_CHAR), \ |
||||
|
defn(PREFERRED_VECTOR_WIDTH_SHORT), \ |
||||
|
defn(PREFERRED_VECTOR_WIDTH_INT), \ |
||||
|
defn(PREFERRED_VECTOR_WIDTH_LONG), \ |
||||
|
defn(PREFERRED_VECTOR_WIDTH_FLOAT), \ |
||||
|
defn(PREFERRED_VECTOR_WIDTH_DOUBLE), \ |
||||
|
defn(MAX_CLOCK_FREQUENCY), \ |
||||
|
defn(ADDRESS_BITS), \ |
||||
|
defn(MAX_MEM_ALLOC_SIZE), \ |
||||
|
defn(IMAGE_SUPPORT), \ |
||||
|
defn(MAX_READ_IMAGE_ARGS), \ |
||||
|
defn(MAX_WRITE_IMAGE_ARGS), \ |
||||
|
defn(IMAGE2D_MAX_WIDTH), \ |
||||
|
defn(IMAGE2D_MAX_HEIGHT), \ |
||||
|
defn(IMAGE3D_MAX_WIDTH), \ |
||||
|
defn(IMAGE3D_MAX_HEIGHT), \ |
||||
|
defn(IMAGE3D_MAX_DEPTH), \ |
||||
|
defn(MAX_SAMPLERS), \ |
||||
|
defn(MAX_PARAMETER_SIZE), \ |
||||
|
defn(MEM_BASE_ADDR_ALIGN), \ |
||||
|
defn(MIN_DATA_TYPE_ALIGN_SIZE), \ |
||||
|
defn(GLOBAL_MEM_CACHELINE_SIZE), \ |
||||
|
defn(GLOBAL_MEM_CACHE_SIZE), \ |
||||
|
defn(GLOBAL_MEM_SIZE), \ |
||||
|
defn(MAX_CONSTANT_BUFFER_SIZE), \ |
||||
|
defn(MAX_CONSTANT_ARGS), \ |
||||
|
defn(LOCAL_MEM_SIZE), \ |
||||
|
defn(ERROR_CORRECTION_SUPPORT), \ |
||||
|
defn(PROFILING_TIMER_RESOLUTION), \ |
||||
|
defn(ENDIAN_LITTLE), \ |
||||
|
defn(AVAILABLE), \ |
||||
|
defn(COMPILER_AVAILABLE), |
||||
|
|
||||
|
#define STR_PROPS \ |
||||
|
defn(NAME), \ |
||||
|
defn(VENDOR), \ |
||||
|
defn(PROFILE), \ |
||||
|
defn(VERSION), \ |
||||
|
defn(EXTENSIONS), |
||||
|
|
||||
|
#define HEX_PROPS \ |
||||
|
defn(SINGLE_FP_CONFIG), \ |
||||
|
defn(QUEUE_PROPERTIES), |
||||
|
|
||||
|
|
||||
|
printf("---------------------------------------------------------------------\n"); |
||||
|
|
||||
|
|
||||
|
static struct { cl_device_info param; const char *name; } longProps[] = { |
||||
|
#define defn(X) { CL_DEVICE_##X, #X } |
||||
|
LONG_PROPS |
||||
|
#undef defn |
||||
|
{ 0, NULL }, |
||||
|
}; |
||||
|
static struct { cl_device_info param; const char *name; } hexProps[] = { |
||||
|
#define defn(X) { CL_DEVICE_##X, #X } |
||||
|
HEX_PROPS |
||||
|
#undef defn |
||||
|
{ 0, NULL }, |
||||
|
}; |
||||
|
static struct { cl_device_info param; const char *name; } strProps[] = { |
||||
|
#define defn(X) { CL_DEVICE_##X, #X } |
||||
|
STR_PROPS |
||||
|
#undef defn |
||||
|
{ CL_DRIVER_VERSION, "DRIVER_VERSION" }, |
||||
|
{ 0, NULL }, |
||||
|
}; |
||||
|
cl_int status; |
||||
|
size_t size; |
||||
|
char buf[65536]; |
||||
|
long long val; /* Avoids unpleasant surprises for some params */ |
||||
|
int ii; |
||||
|
|
||||
|
for (ii = 0; strProps[ii].name != NULL; ii++) |
||||
|
{ |
||||
|
status = clGetDeviceInfo(device, strProps[ii].param, sizeof buf, buf, &size); |
||||
|
if (status != CL_SUCCESS) |
||||
|
{ |
||||
|
printf("Unable to get %s: %s!\n", |
||||
|
strProps[ii].name, cl_error_to_str(status)); |
||||
|
continue; |
||||
|
} |
||||
|
if (size > sizeof buf) |
||||
|
{ |
||||
|
printf("Large %s (%zd bytes)! Truncating to %ld!\n", |
||||
|
strProps[ii].name, size, sizeof buf); |
||||
|
} |
||||
|
printf("%s: %s\n", |
||||
|
strProps[ii].name, buf); |
||||
|
} |
||||
|
printf("\n"); |
||||
|
|
||||
|
status = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof val, &val, NULL); |
||||
|
if (status == CL_SUCCESS) |
||||
|
{ |
||||
|
printf("Type: "); |
||||
|
if (val & CL_DEVICE_TYPE_DEFAULT) |
||||
|
{ |
||||
|
val &= ~CL_DEVICE_TYPE_DEFAULT; |
||||
|
printf("Default "); |
||||
|
} |
||||
|
if (val & CL_DEVICE_TYPE_CPU) |
||||
|
{ |
||||
|
val &= ~CL_DEVICE_TYPE_CPU; |
||||
|
printf("CPU "); |
||||
|
} |
||||
|
if (val & CL_DEVICE_TYPE_GPU) |
||||
|
{ |
||||
|
val &= ~CL_DEVICE_TYPE_GPU; |
||||
|
printf("GPU "); |
||||
|
} |
||||
|
if (val & CL_DEVICE_TYPE_ACCELERATOR) |
||||
|
{ |
||||
|
val &= ~CL_DEVICE_TYPE_ACCELERATOR; |
||||
|
printf("Accelerator "); |
||||
|
} |
||||
|
if (val != 0) { |
||||
|
printf("Unknown (0x%llx) ", val); |
||||
|
} |
||||
|
printf("\n"); |
||||
|
} |
||||
|
else |
||||
|
{ |
||||
|
printf("Unable to get TYPE: %s!\n", |
||||
|
cl_error_to_str(status)); |
||||
|
} |
||||
|
|
||||
|
status = clGetDeviceInfo(device, CL_DEVICE_EXECUTION_CAPABILITIES, |
||||
|
sizeof val, &val, NULL); |
||||
|
if (status == CL_SUCCESS) |
||||
|
{ |
||||
|
printf("EXECUTION_CAPABILITIES: "); |
||||
|
if (val & CL_EXEC_KERNEL) |
||||
|
{ |
||||
|
val &= ~CL_EXEC_KERNEL; |
||||
|
printf("Kernel "); |
||||
|
} |
||||
|
if (val & CL_EXEC_NATIVE_KERNEL) |
||||
|
{ |
||||
|
val &= ~CL_EXEC_NATIVE_KERNEL; |
||||
|
printf("Native "); |
||||
|
} |
||||
|
if (val) |
||||
|
printf("Unknown (0x%llx) ", val); |
||||
|
|
||||
|
printf("\n"); |
||||
|
} |
||||
|
else |
||||
|
{ |
||||
|
printf("Unable to get EXECUTION_CAPABILITIES: %s!\n", |
||||
|
cl_error_to_str(status)); |
||||
|
} |
||||
|
|
||||
|
status = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, |
||||
|
sizeof val, &val, NULL); |
||||
|
if (status == CL_SUCCESS) |
||||
|
{ |
||||
|
static const char *cacheTypes[] = { "None", "Read-Only", "Read-Write" }; |
||||
|
static int numTypes = sizeof cacheTypes / sizeof cacheTypes[0]; |
||||
|
|
||||
|
printf("GLOBAL_MEM_CACHE_TYPE: %s (%lld)\n", |
||||
|
val < numTypes ? cacheTypes[val] : "???", val); |
||||
|
} |
||||
|
else |
||||
|
{ |
||||
|
printf("Unable to get GLOBAL_MEM_CACHE_TYPE: %s!\n", |
||||
|
cl_error_to_str(status)); |
||||
|
} |
||||
|
|
||||
|
status = clGetDeviceInfo(device, |
||||
|
CL_DEVICE_LOCAL_MEM_TYPE, sizeof val, &val, NULL); |
||||
|
|
||||
|
if (status == CL_SUCCESS) |
||||
|
{ |
||||
|
static const char *lmemTypes[] = { "???", "Local", "Global" }; |
||||
|
static int numTypes = sizeof lmemTypes / sizeof lmemTypes[0]; |
||||
|
|
||||
|
printf("CL_DEVICE_LOCAL_MEM_TYPE: %s (%lld)\n", |
||||
|
val < numTypes ? lmemTypes[val] : "???", val); |
||||
|
} |
||||
|
else |
||||
|
{ |
||||
|
printf("Unable to get CL_DEVICE_LOCAL_MEM_TYPE: %s!\n", |
||||
|
cl_error_to_str(status)); |
||||
|
} |
||||
|
|
||||
|
for (ii = 0; hexProps[ii].name != NULL; ii++) |
||||
|
{ |
||||
|
status = clGetDeviceInfo(device, hexProps[ii].param, sizeof val, &val, &size); |
||||
|
if (status != CL_SUCCESS) |
||||
|
{ |
||||
|
printf("Unable to get %s: %s!\n", |
||||
|
hexProps[ii].name, cl_error_to_str(status)); |
||||
|
continue; |
||||
|
} |
||||
|
if (size > sizeof val) |
||||
|
{ |
||||
|
printf("Large %s (%zd bytes)! Truncating to %ld!\n", |
||||
|
hexProps[ii].name, size, sizeof val); |
||||
|
} |
||||
|
printf("%s: 0x%llx\n", hexProps[ii].name, val); |
||||
|
} |
||||
|
printf("\n"); |
||||
|
|
||||
|
for (ii = 0; longProps[ii].name != NULL; ii++) |
||||
|
{ |
||||
|
status = clGetDeviceInfo(device, longProps[ii].param, sizeof val, &val, &size); |
||||
|
if (status != CL_SUCCESS) |
||||
|
{ |
||||
|
printf("Unable to get %s: %s!\n", |
||||
|
longProps[ii].name, cl_error_to_str(status)); |
||||
|
continue; |
||||
|
} |
||||
|
if (size > sizeof val) |
||||
|
{ |
||||
|
printf("Large %s (%zd bytes)! Truncating to %ld!\n", |
||||
|
longProps[ii].name, size, sizeof val); |
||||
|
} |
||||
|
printf("%s: %lld\n", longProps[ii].name, val); |
||||
|
} |
||||
|
|
||||
|
{ |
||||
|
size_t size; |
||||
|
CALL_CL_GUARDED(clGetDeviceInfo, |
||||
|
(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, 0, 0, &size)); |
||||
|
|
||||
|
size_t res_vec[size/sizeof(size_t)]; // C99 VLA yay! |
||||
|
|
||||
|
CALL_CL_GUARDED(clGetDeviceInfo, |
||||
|
(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, size, res_vec, &size)); |
||||
|
|
||||
|
printf("MAX_WORK_GROUP_SIZES: "); // a tiny lie |
||||
|
for (size_t i = 0; i < size/sizeof(size_t); ++i) |
||||
|
printf("%zd ", res_vec[i]); |
||||
|
printf("\n"); |
||||
|
} |
||||
|
printf("---------------------------------------------------------------------\n"); |
||||
|
} |
||||
|
|
||||
|
|
||||
|
|
||||
|
void print_device_info_from_queue(cl_command_queue queue) |
||||
|
{ |
||||
|
cl_device_id dev; |
||||
|
CALL_CL_GUARDED(clGetCommandQueueInfo, |
||||
|
(queue, CL_QUEUE_DEVICE, sizeof dev, &dev, NULL)); |
||||
|
|
||||
|
print_device_info(dev); |
||||
|
} |
||||
@ -0,0 +1,250 @@ |
|||||
|
/* |
||||
|
* Copyright (c) 2010, 2012 Andreas Kloeckner |
||||
|
* |
||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
* of this software and associated documentation files (the "Software"), to deal |
||||
|
* in the Software without restriction, including without limitation the rights |
||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
* copies of the Software, and to permit persons to whom the Software is |
||||
|
* furnished to do so, subject to the following conditions: |
||||
|
* |
||||
|
* The above copyright notice and this permission notice shall be included in |
||||
|
* all copies or substantial portions of the Software. |
||||
|
* |
||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
* THE SOFTWARE. |
||||
|
*/ |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
#ifndef NYUHPC_CL_HELPER |
||||
|
#define NYUHPC_CL_HELPER |
||||
|
|
||||
|
#include <stdarg.h> |
||||
|
#include <stdio.h> |
||||
|
#include <stdlib.h> |
||||
|
|
||||
|
#ifdef __APPLE__ |
||||
|
#include <OpenCL/opencl.h> |
||||
|
#else |
||||
|
#include <CL/cl.h> |
||||
|
#endif |
||||
|
|
||||
|
/* An error check macro for OpenCL. |
||||
|
* |
||||
|
* Usage: |
||||
|
* CHECK_CL_ERROR(status_code_from_a_cl_operation, "function_name") |
||||
|
* |
||||
|
* It will abort with a message if an error occurred. |
||||
|
*/ |
||||
|
|
||||
|
#define CHECK_CL_ERROR(STATUS_CODE, WHAT) \ |
||||
|
if ((STATUS_CODE) != CL_SUCCESS) \ |
||||
|
{ \ |
||||
|
fprintf(stderr, \ |
||||
|
"*** '%s' in '%s' on line %d failed with error '%s'.\n", \ |
||||
|
WHAT, __FILE__, __LINE__, \ |
||||
|
cl_error_to_str(STATUS_CODE)); \ |
||||
|
abort(); \ |
||||
|
} |
||||
|
|
||||
|
/* A more automated error check macro for OpenCL, for use with clXxxx |
||||
|
* functions that return status codes. (Not all of them do, notably |
||||
|
* clCreateXxx do not.) |
||||
|
* |
||||
|
* Usage: |
||||
|
* CALL_CL_GUARDED(clFunction, (arg1, arg2)); |
||||
|
* |
||||
|
* Note the slightly strange comma between the function name and the |
||||
|
* argument list. |
||||
|
*/ |
||||
|
|
||||
|
#define CALL_CL_GUARDED(NAME, ARGLIST) \ |
||||
|
{ \ |
||||
|
cl_int status_code; \ |
||||
|
status_code = NAME ARGLIST; \ |
||||
|
CHECK_CL_ERROR(status_code, #NAME); \ |
||||
|
} |
||||
|
|
||||
|
/* An error check macro for Unix system functions. If "COND" is true, then the |
||||
|
* last system error ("errno") is printed along with MSG, which is supposed to |
||||
|
* be a string describing what you were doing. |
||||
|
* |
||||
|
* Example: |
||||
|
* CHECK_SYS_ERROR(dave != 0, "opening hatch"); |
||||
|
*/ |
||||
|
#define CHECK_SYS_ERROR(COND, MSG) \ |
||||
|
if (COND) \ |
||||
|
{ \ |
||||
|
perror(MSG); \ |
||||
|
abort(); \ |
||||
|
} |
||||
|
|
||||
|
/* Return a string describing the OpenCL error code 'e'. |
||||
|
*/ |
||||
|
const char *cl_error_to_str(cl_int e); |
||||
|
|
||||
|
/* Print a list of available OpenCL platforms and devices |
||||
|
* to standard output. |
||||
|
*/ |
||||
|
void print_platforms_devices(); |
||||
|
|
||||
|
/* Create an OpenCL context and a matching command queue on a platform from a |
||||
|
* vendor whose name contains 'plat_name' on a device whose name contains |
||||
|
* 'dev_name'. Both 'plat_name' and 'dev_name' may be NULL, indicating no |
||||
|
* preference in the matter. |
||||
|
* |
||||
|
* If multiple devices match both 'plat_name' and 'dev_name', then 'idx' |
||||
|
* prescribes the number of the device that should be chosen. |
||||
|
* |
||||
|
* You may also use the special value CHOOSE_INTERACTIVELY to offer the user |
||||
|
* a choice. You should use this value for code you turn in. |
||||
|
* |
||||
|
* This function always succeeds. (If an error occurs, the program |
||||
|
* is aborted. |
||||
|
* |
||||
|
* You can force interactive querying by defining the |
||||
|
* CL_HELPER_FORCE_INTERACTIVE macro when compiling cl-helper.c. |
||||
|
* You may do so by passing the -DCL_HELPER_FORCE_INTERACTIVE |
||||
|
* compiler option. |
||||
|
*/ |
||||
|
extern const char *CHOOSE_INTERACTIVELY; |
||||
|
void create_context_on(const char *plat_name, const char*dev_name, cl_uint |
||||
|
idx, cl_context *ctx, cl_command_queue *queue, int enable_profiling); |
||||
|
|
||||
|
/* Read contents of file 'filename'. |
||||
|
* Return as a new string. You must free the string when you're done with it. |
||||
|
* |
||||
|
* This function always succeeds. (If an error occurs, the program |
||||
|
* is aborted. |
||||
|
*/ |
||||
|
char *read_file(const char *filename); |
||||
|
|
||||
|
/* Create a new OpenCL kernel from the code in the string 'knl'. |
||||
|
* 'knl_name' is the name of the kernel function, and 'options', |
||||
|
* if not NULL, is a string containing compiler flags. |
||||
|
* |
||||
|
* You must release the resulting kernel when you're done |
||||
|
* with it. |
||||
|
* |
||||
|
* This function always succeeds. (If an error occurs, the program |
||||
|
* is aborted. |
||||
|
*/ |
||||
|
cl_kernel kernel_from_string(cl_context ctx, |
||||
|
char const *knl, char const *knl_name, char const *options); |
||||
|
|
||||
|
/* Print information about a device, found from either the |
||||
|
* queue or the device_id. |
||||
|
*/ |
||||
|
void print_device_info(cl_device_id device); |
||||
|
void print_device_info_from_queue(cl_command_queue queue); |
||||
|
|
||||
|
#define SET_1_KERNEL_ARG(knl, arg0) \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); |
||||
|
|
||||
|
#define SET_2_KERNEL_ARGS(knl, arg0, arg1) \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); |
||||
|
|
||||
|
#define SET_3_KERNEL_ARGS(knl, arg0, arg1, arg2) \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); |
||||
|
|
||||
|
#define SET_4_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3) \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); |
||||
|
|
||||
|
#define SET_5_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4) \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); |
||||
|
|
||||
|
#define SET_6_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5) \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); |
||||
|
|
||||
|
#define SET_7_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6) \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); |
||||
|
|
||||
|
#define SET_8_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); |
||||
|
|
||||
|
#define SET_9_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); |
||||
|
|
||||
|
#define SET_10_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9) \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); |
||||
|
|
||||
|
#define SET_11_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10) \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 10, sizeof(arg10), &arg10)); |
||||
|
|
||||
|
#define SET_12_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11) \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 10, sizeof(arg10), &arg10)); \ |
||||
|
CALL_CL_GUARDED(clSetKernelArg, (knl, 11, sizeof(arg11), &arg11)); |
||||
|
|
||||
|
#endif |
||||
@ -0,0 +1,7 @@ |
|||||
|
#include "cl-helper.h" |
||||
|
|
||||
|
int main(int argc, char **argv) |
||||
|
{ |
||||
|
print_platforms_devices(); |
||||
|
return 0; |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
#! /bin/bash |
||||
|
|
||||
|
NCPUS=$(grep processor /proc/cpuinfo | wc -l) |
||||
|
for i in $(seq 0 $((NCPUS-1)) ); do |
||||
|
echo $i |
||||
|
cpufreq-set -g $1 -c $i |
||||
|
done |
||||
@ -0,0 +1,2 @@ |
|||||
|
#! /bin/sh |
||||
|
cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_cur_freq |
||||
@ -0,0 +1,54 @@ |
|||||
|
#ifdef __APPLE__ |
||||
|
|
||||
|
#include <sys/time.h> |
||||
|
|
||||
|
typedef struct timeval timestamp_type; |
||||
|
|
||||
|
static void get_timestamp(timestamp_type *t) |
||||
|
{ |
||||
|
gettimeofday(t, NULL); |
||||
|
} |
||||
|
|
||||
|
static double timestamp_diff_in_seconds(timestamp_type start, |
||||
|
timestamp_type end) |
||||
|
{ |
||||
|
/* Perform the carry for the later subtraction by updating start. */ |
||||
|
if (end.tv_usec < start.tv_usec) { |
||||
|
int nsec = (start.tv_usec - end.tv_usec) / 1000000 + 1; |
||||
|
start.tv_usec -= 1000000 * nsec; |
||||
|
start.tv_sec += nsec; |
||||
|
} |
||||
|
if (end.tv_usec - start.tv_usec > 1000000) { |
||||
|
int nsec = (end.tv_usec - start.tv_usec) / 1000000; |
||||
|
start.tv_usec += 1000000 * nsec; |
||||
|
start.tv_sec -= nsec; |
||||
|
} |
||||
|
|
||||
|
return end.tv_sec - start.tv_sec + (end.tv_usec - start.tv_usec)*1e-6; |
||||
|
} |
||||
|
|
||||
|
#else |
||||
|
|
||||
|
#include <time.h> |
||||
|
|
||||
|
typedef struct timespec timestamp_type; |
||||
|
|
||||
|
static void get_timestamp(timestamp_type *t) |
||||
|
{ |
||||
|
clock_gettime(CLOCK_REALTIME, t); |
||||
|
} |
||||
|
|
||||
|
static double timestamp_diff_in_seconds(timestamp_type start, timestamp_type end) |
||||
|
{ |
||||
|
struct timespec temp; |
||||
|
if ((end.tv_nsec-start.tv_nsec)<0) { |
||||
|
temp.tv_sec = end.tv_sec-start.tv_sec-1; |
||||
|
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec; |
||||
|
} else { |
||||
|
temp.tv_sec = end.tv_sec-start.tv_sec; |
||||
|
temp.tv_nsec = end.tv_nsec-start.tv_nsec; |
||||
|
} |
||||
|
return temp.tv_sec + 1e-9*temp.tv_nsec; |
||||
|
} |
||||
|
|
||||
|
#endif |
||||
@ -0,0 +1,12 @@ |
|||||
|
#pragma OPENCL EXTENSION cl_khr_fp64: enable |
||||
|
|
||||
|
__kernel void sum( |
||||
|
__global const float *a, |
||||
|
__global const float *b, |
||||
|
__global float *c, |
||||
|
long n) |
||||
|
{ |
||||
|
int gid = get_global_id(0); |
||||
|
if (gid < n) |
||||
|
c[gid] = a[gid] + b[gid]; |
||||
|
} |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue