5 changed files with 323 additions and 9 deletions
-
11.gitignore
-
12Makefile
-
28README.md
-
210createclbin.c
-
71part6bin.c
@ -1 +1,10 @@ |
|||||
part* |
|
||||
|
part1 |
||||
|
part2 |
||||
|
part3 |
||||
|
part4 |
||||
|
part4.clbin |
||||
|
part5 |
||||
|
part6 |
||||
|
part8 |
||||
|
createclbin |
||||
|
part6bin |
||||
@ -1,16 +1,18 @@ |
|||||
CFLAGS += -O2 -march=native -std=c99 -I/usr/local/include |
|
||||
LIBS += -lcl -L/usr/local/lib64/beignet |
|
||||
|
CFLAGS += -O0 -Werror -ggdb -std=c99 |
||||
|
LIBS += -lOpenCL |
||||
CC = cc |
CC = cc |
||||
|
|
||||
BINARIES = part1 part2 part3 part4 part5 part6 part8 |
|
||||
|
BINARIES = part1 part2 part3 part4 part5 part6 part8 createclbin part6bin |
||||
|
|
||||
all: $(BINARIES) |
all: $(BINARIES) |
||||
|
|
||||
%: %.c |
%: %.c |
||||
$(CC) $(CFLAGS) $(LIBS) -o $@ $< |
$(CC) $(CFLAGS) $(LIBS) -o $@ $< |
||||
strip $@ |
|
||||
|
|
||||
|
part4.clbin: createclbin |
||||
|
createclbin |
||||
|
|
||||
.PHONY: clean |
.PHONY: clean |
||||
|
|
||||
clean: |
clean: |
||||
rm $(BINARIES) |
|
||||
|
@rm -f $(BINARIES) part4.clbin |
||||
@ -1,4 +1,26 @@ |
|||||
OpenCL tutorial notes |
|
||||
===================== |
|
||||
|
# OpenCL cookbook code |
||||
|
|
||||
URL: http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/ |
|
||||
|
These are some code examples from the |
||||
|
[OpenCL cookbook tutorial series](http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/). |
||||
|
|
||||
|
## Description |
||||
|
|
||||
|
Basically I just took the examples from that tutorial and put them into a |
||||
|
repo. |
||||
|
|
||||
|
## Requirements |
||||
|
|
||||
|
Some OpenCL capable hardware and the according OpenCL library exposing the |
||||
|
OpenCL API. I tested this on an Intel GPU (Intel Corporation Haswell-ULT |
||||
|
Integrated Graphics Controller (rev 09)) with the |
||||
|
[beignet](https://www.freedesktop.org/wiki/Software/Beignet/) |
||||
|
open source library. |
||||
|
|
||||
|
## License |
||||
|
|
||||
|
As far as I can say the code is free of any license. It's purpose is just to |
||||
|
demonstrate OpenCL. |
||||
|
|
||||
|
## Author |
||||
|
|
||||
|
Dhruba Bandopadhyay |
||||
@ -0,0 +1,210 @@ |
|||||
|
#include <stdio.h> |
||||
|
#include <stdlib.h> |
||||
|
#include <assert.h> |
||||
|
|
||||
|
#ifdef __APPLE__ |
||||
|
#include <OpenCL/opencl.h> |
||||
|
#else |
||||
|
#include <CL/cl.h> |
||||
|
#endif |
||||
|
|
||||
|
#define KERNEL "part4.cl" |
||||
|
|
||||
|
size_t |
||||
|
clPutProgramBinaryToFile( |
||||
|
const char * const filename, |
||||
|
const cl_program * const program) |
||||
|
{ |
||||
|
cl_int cl_status; |
||||
|
|
||||
|
cl_uint num_devices; |
||||
|
cl_status = clGetProgramInfo( |
||||
|
*program, |
||||
|
CL_PROGRAM_NUM_DEVICES, |
||||
|
sizeof(cl_uint), |
||||
|
&num_devices, |
||||
|
NULL); |
||||
|
|
||||
|
if (cl_status != CL_SUCCESS) { |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
cl_device_id devices[num_devices]; |
||||
|
cl_status = |
||||
|
clGetProgramInfo( |
||||
|
*program, |
||||
|
CL_PROGRAM_DEVICES, |
||||
|
sizeof(cl_device_id) * num_devices, |
||||
|
devices, |
||||
|
NULL); |
||||
|
|
||||
|
if (cl_status != CL_SUCCESS) { |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
size_t binary_size[num_devices]; |
||||
|
cl_status = |
||||
|
clGetProgramInfo( |
||||
|
*program, |
||||
|
CL_PROGRAM_BINARY_SIZES, |
||||
|
sizeof(size_t) * num_devices, |
||||
|
binary_size, |
||||
|
NULL); |
||||
|
|
||||
|
if (cl_status != CL_SUCCESS) { |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
unsigned char * binaries[num_devices]; |
||||
|
for (cl_uint i = 0; i < num_devices; i++) { |
||||
|
binaries[i] = (unsigned char *) malloc(binary_size[i]); |
||||
|
} |
||||
|
cl_status = |
||||
|
clGetProgramInfo( |
||||
|
*program, |
||||
|
CL_PROGRAM_BINARIES, |
||||
|
sizeof(unsigned char *) * num_devices, |
||||
|
binaries, |
||||
|
NULL); |
||||
|
|
||||
|
if (cl_status != CL_SUCCESS) { |
||||
|
for (cl_uint i = 0; i < num_devices; i++) { |
||||
|
free(binaries[i]); |
||||
|
} |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
FILE * handle = fopen(filename, "wb"); |
||||
|
size_t size = fwrite(binaries[0], sizeof(unsigned char), binary_size[0], handle); |
||||
|
|
||||
|
for (cl_uint i = 0; i < num_devices; i++) { |
||||
|
free(binaries[i]); |
||||
|
} |
||||
|
fclose(handle); |
||||
|
|
||||
|
return size; |
||||
|
} |
||||
|
|
||||
|
size_t |
||||
|
clGetProgramFromSourceFile( |
||||
|
const char * const filename, |
||||
|
const cl_context * const context, |
||||
|
cl_program * const program) |
||||
|
{ |
||||
|
/* |
||||
|
* Get a build OpenCL program from source |
||||
|
*/ |
||||
|
FILE * handle; |
||||
|
char * buffer; |
||||
|
size_t size; |
||||
|
|
||||
|
cl_int cl_status; |
||||
|
cl_uint num_devices; |
||||
|
|
||||
|
// get size of kernel source |
||||
|
handle = fopen(filename, "r"); |
||||
|
fseek(handle, 0, SEEK_END); |
||||
|
size = ftell(handle); |
||||
|
rewind(handle); |
||||
|
|
||||
|
// read kernel source into buffer |
||||
|
buffer = (char*) malloc(size + 1); |
||||
|
buffer[size] = '\0'; |
||||
|
|
||||
|
if (size != fread(buffer, sizeof(char), size, handle)) |
||||
|
{ |
||||
|
fclose(handle); |
||||
|
free(buffer); |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
fclose(handle); |
||||
|
|
||||
|
// create and build program |
||||
|
*program = clCreateProgramWithSource( |
||||
|
*context, 1, (const char**) &buffer, &size, &cl_status); |
||||
|
|
||||
|
free(buffer); |
||||
|
|
||||
|
if (cl_status != CL_SUCCESS) { |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
cl_status = clGetContextInfo( |
||||
|
*context, |
||||
|
CL_CONTEXT_NUM_DEVICES, |
||||
|
sizeof(cl_uint), |
||||
|
&num_devices, |
||||
|
NULL); |
||||
|
|
||||
|
if (cl_status != CL_SUCCESS) { |
||||
|
clReleaseProgram(*program); |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
cl_device_id devices[num_devices]; |
||||
|
|
||||
|
cl_status = clGetContextInfo( |
||||
|
*context, |
||||
|
CL_CONTEXT_DEVICES, |
||||
|
sizeof(cl_device_id) * num_devices, |
||||
|
devices, |
||||
|
NULL); |
||||
|
|
||||
|
cl_status = clBuildProgram( |
||||
|
*program, 1, devices, "-Werror -cl-std=CL1.1", NULL, NULL); |
||||
|
|
||||
|
if (cl_status != CL_SUCCESS) { |
||||
|
clReleaseProgram(*program); |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
return size; |
||||
|
} |
||||
|
|
||||
|
int |
||||
|
clInit(cl_context * const context) |
||||
|
{ |
||||
|
/* |
||||
|
* TODO add failure handling |
||||
|
*/ |
||||
|
cl_platform_id platform; |
||||
|
cl_uint num_devices; |
||||
|
|
||||
|
// get first available sdk and gpu and create context |
||||
|
clGetPlatformIDs(1, &platform, NULL); |
||||
|
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 10, NULL, &num_devices); |
||||
|
printf("%u devices during init.\n", num_devices); |
||||
|
cl_device_id devices[num_devices]; |
||||
|
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL); |
||||
|
*context = clCreateContext(NULL, num_devices, devices, NULL, NULL, NULL); |
||||
|
|
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
int main() |
||||
|
{ |
||||
|
cl_int cl_status; |
||||
|
cl_context context; |
||||
|
cl_program program; |
||||
|
|
||||
|
size_t sourceSize; |
||||
|
size_t count; |
||||
|
|
||||
|
clInit(&context); |
||||
|
sourceSize = |
||||
|
clGetProgramFromSourceFile(KERNEL, &context, &program); |
||||
|
|
||||
|
assert(sourceSize != 0); |
||||
|
|
||||
|
count = clPutProgramBinaryToFile(KERNEL "bin", &program); |
||||
|
|
||||
|
assert(count != 0); |
||||
|
|
||||
|
clReleaseProgram(program); |
||||
|
clReleaseContext(context); |
||||
|
|
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
// vim: set ft=c ts=4 sw=4: |
||||
@ -0,0 +1,71 @@ |
|||||
|
#include <stdio.h> |
||||
|
#include <stdlib.h> |
||||
|
#include <assert.h> |
||||
|
#ifdef __APPLE__ |
||||
|
#include <OpenCL/opencl.h> |
||||
|
#else |
||||
|
#include <CL/cl.h> |
||||
|
#endif |
||||
|
|
||||
|
#define KERNEL "part4.clbin" |
||||
|
|
||||
|
int main() { |
||||
|
|
||||
|
cl_platform_id platform; cl_device_id device; cl_context context; |
||||
|
cl_program program; cl_kernel kernel; cl_command_queue queue; |
||||
|
cl_mem kernelBuffer; |
||||
|
|
||||
|
FILE* programHandle; char *programBuffer; char *programLog; |
||||
|
size_t programSize; char hostBuffer[32]; |
||||
|
|
||||
|
// get first available sdk and gpu and create context |
||||
|
clGetPlatformIDs(1, &platform, NULL); |
||||
|
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); |
||||
|
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); |
||||
|
|
||||
|
// get size of kernel source |
||||
|
programHandle = fopen(KERNEL, "rb"); |
||||
|
fseek(programHandle, 0, SEEK_END); |
||||
|
programSize = ftell(programHandle); |
||||
|
rewind(programHandle); |
||||
|
|
||||
|
// read kernel source into buffer |
||||
|
programBuffer = (char*) malloc(programSize + 1); |
||||
|
programBuffer[programSize] = '\0'; |
||||
|
assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle)); |
||||
|
|
||||
|
fclose(programHandle); |
||||
|
|
||||
|
// create and build program |
||||
|
program = clCreateProgramWithBinary(context, 1, &device, |
||||
|
(const size_t*)&programSize, (const unsigned char **) &programBuffer, NULL, NULL); |
||||
|
free(programBuffer); |
||||
|
|
||||
|
// create kernel and command queue |
||||
|
kernel = clCreateKernel(program, "hello", NULL); |
||||
|
queue = clCreateCommandQueue(context, device, 0, NULL); |
||||
|
|
||||
|
// create kernel argument buffer and set it into kernel |
||||
|
kernelBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, |
||||
|
32 * sizeof(char), NULL, NULL); |
||||
|
clSetKernelArg(kernel, 0, sizeof(cl_mem), &kernelBuffer); |
||||
|
|
||||
|
// execute kernel, read back the output and print to screen |
||||
|
clEnqueueTask(queue, kernel, 0, NULL, NULL); |
||||
|
clEnqueueReadBuffer(queue, kernelBuffer, CL_TRUE, 0, |
||||
|
32 * sizeof(char), hostBuffer, 0, NULL, NULL); |
||||
|
puts(hostBuffer); |
||||
|
|
||||
|
clFlush(queue); |
||||
|
clFinish(queue); |
||||
|
clReleaseKernel(kernel); |
||||
|
clReleaseProgram(program); |
||||
|
clReleaseMemObject(kernelBuffer); |
||||
|
clReleaseCommandQueue(queue); |
||||
|
clReleaseContext(context); |
||||
|
|
||||
|
return 0; |
||||
|
|
||||
|
} |
||||
|
|
||||
|
// vim: set ft=c ts=4 sw=4: |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue