5 changed files with 323 additions and 9 deletions
-
11.gitignore
-
12Makefile
-
28README.md
-
210createclbin.c
-
71part6bin.c
@ -1 +1,10 @@ |
|||
part* |
|||
part1 |
|||
part2 |
|||
part3 |
|||
part4 |
|||
part4.clbin |
|||
part5 |
|||
part6 |
|||
part8 |
|||
createclbin |
|||
part6bin |
|||
@ -1,16 +1,18 @@ |
|||
CFLAGS += -O2 -march=native -std=c99 -I/usr/local/include |
|||
LIBS += -lcl -L/usr/local/lib64/beignet |
|||
CFLAGS += -O0 -Werror -ggdb -std=c99 |
|||
LIBS += -lOpenCL |
|||
CC = cc |
|||
|
|||
BINARIES = part1 part2 part3 part4 part5 part6 part8 |
|||
BINARIES = part1 part2 part3 part4 part5 part6 part8 createclbin part6bin |
|||
|
|||
all: $(BINARIES) |
|||
|
|||
%: %.c |
|||
$(CC) $(CFLAGS) $(LIBS) -o $@ $< |
|||
strip $@ |
|||
|
|||
part4.clbin: createclbin |
|||
createclbin |
|||
|
|||
.PHONY: clean |
|||
|
|||
clean: |
|||
rm $(BINARIES) |
|||
@rm -f $(BINARIES) part4.clbin |
|||
@ -1,4 +1,26 @@ |
|||
OpenCL tutorial notes |
|||
===================== |
|||
# OpenCL cookbook code |
|||
|
|||
URL: http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/ |
|||
These are some code examples from the |
|||
[OpenCL cookbook tutorial series](http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/). |
|||
|
|||
## Description |
|||
|
|||
Basically I just took the examples from that tutorial and put them into a |
|||
repo. |
|||
|
|||
## Requirements |
|||
|
|||
Some OpenCL capable hardware and the according OpenCL library exposing the |
|||
OpenCL API. I tested this on an Intel GPU (Intel Corporation Haswell-ULT |
|||
Integrated Graphics Controller (rev 09)) with the |
|||
[beignet](https://www.freedesktop.org/wiki/Software/Beignet/) |
|||
open source library. |
|||
|
|||
## License |
|||
|
|||
As far as I can say the code is free of any license. It's purpose is just to |
|||
demonstrate OpenCL. |
|||
|
|||
## Author |
|||
|
|||
Dhruba Bandopadhyay |
|||
@ -0,0 +1,210 @@ |
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#include <assert.h> |
|||
|
|||
#ifdef __APPLE__ |
|||
#include <OpenCL/opencl.h> |
|||
#else |
|||
#include <CL/cl.h> |
|||
#endif |
|||
|
|||
#define KERNEL "part4.cl" |
|||
|
|||
size_t |
|||
clPutProgramBinaryToFile( |
|||
const char * const filename, |
|||
const cl_program * const program) |
|||
{ |
|||
cl_int cl_status; |
|||
|
|||
cl_uint num_devices; |
|||
cl_status = clGetProgramInfo( |
|||
*program, |
|||
CL_PROGRAM_NUM_DEVICES, |
|||
sizeof(cl_uint), |
|||
&num_devices, |
|||
NULL); |
|||
|
|||
if (cl_status != CL_SUCCESS) { |
|||
return 0; |
|||
} |
|||
|
|||
cl_device_id devices[num_devices]; |
|||
cl_status = |
|||
clGetProgramInfo( |
|||
*program, |
|||
CL_PROGRAM_DEVICES, |
|||
sizeof(cl_device_id) * num_devices, |
|||
devices, |
|||
NULL); |
|||
|
|||
if (cl_status != CL_SUCCESS) { |
|||
return 0; |
|||
} |
|||
|
|||
size_t binary_size[num_devices]; |
|||
cl_status = |
|||
clGetProgramInfo( |
|||
*program, |
|||
CL_PROGRAM_BINARY_SIZES, |
|||
sizeof(size_t) * num_devices, |
|||
binary_size, |
|||
NULL); |
|||
|
|||
if (cl_status != CL_SUCCESS) { |
|||
return 0; |
|||
} |
|||
|
|||
unsigned char * binaries[num_devices]; |
|||
for (cl_uint i = 0; i < num_devices; i++) { |
|||
binaries[i] = (unsigned char *) malloc(binary_size[i]); |
|||
} |
|||
cl_status = |
|||
clGetProgramInfo( |
|||
*program, |
|||
CL_PROGRAM_BINARIES, |
|||
sizeof(unsigned char *) * num_devices, |
|||
binaries, |
|||
NULL); |
|||
|
|||
if (cl_status != CL_SUCCESS) { |
|||
for (cl_uint i = 0; i < num_devices; i++) { |
|||
free(binaries[i]); |
|||
} |
|||
return 0; |
|||
} |
|||
|
|||
FILE * handle = fopen(filename, "wb"); |
|||
size_t size = fwrite(binaries[0], sizeof(unsigned char), binary_size[0], handle); |
|||
|
|||
for (cl_uint i = 0; i < num_devices; i++) { |
|||
free(binaries[i]); |
|||
} |
|||
fclose(handle); |
|||
|
|||
return size; |
|||
} |
|||
|
|||
size_t |
|||
clGetProgramFromSourceFile( |
|||
const char * const filename, |
|||
const cl_context * const context, |
|||
cl_program * const program) |
|||
{ |
|||
/* |
|||
* Get a build OpenCL program from source |
|||
*/ |
|||
FILE * handle; |
|||
char * buffer; |
|||
size_t size; |
|||
|
|||
cl_int cl_status; |
|||
cl_uint num_devices; |
|||
|
|||
// get size of kernel source |
|||
handle = fopen(filename, "r"); |
|||
fseek(handle, 0, SEEK_END); |
|||
size = ftell(handle); |
|||
rewind(handle); |
|||
|
|||
// read kernel source into buffer |
|||
buffer = (char*) malloc(size + 1); |
|||
buffer[size] = '\0'; |
|||
|
|||
if (size != fread(buffer, sizeof(char), size, handle)) |
|||
{ |
|||
fclose(handle); |
|||
free(buffer); |
|||
return 0; |
|||
} |
|||
|
|||
fclose(handle); |
|||
|
|||
// create and build program |
|||
*program = clCreateProgramWithSource( |
|||
*context, 1, (const char**) &buffer, &size, &cl_status); |
|||
|
|||
free(buffer); |
|||
|
|||
if (cl_status != CL_SUCCESS) { |
|||
return 0; |
|||
} |
|||
|
|||
cl_status = clGetContextInfo( |
|||
*context, |
|||
CL_CONTEXT_NUM_DEVICES, |
|||
sizeof(cl_uint), |
|||
&num_devices, |
|||
NULL); |
|||
|
|||
if (cl_status != CL_SUCCESS) { |
|||
clReleaseProgram(*program); |
|||
return 0; |
|||
} |
|||
|
|||
cl_device_id devices[num_devices]; |
|||
|
|||
cl_status = clGetContextInfo( |
|||
*context, |
|||
CL_CONTEXT_DEVICES, |
|||
sizeof(cl_device_id) * num_devices, |
|||
devices, |
|||
NULL); |
|||
|
|||
cl_status = clBuildProgram( |
|||
*program, 1, devices, "-Werror -cl-std=CL1.1", NULL, NULL); |
|||
|
|||
if (cl_status != CL_SUCCESS) { |
|||
clReleaseProgram(*program); |
|||
return 0; |
|||
} |
|||
|
|||
return size; |
|||
} |
|||
|
|||
int |
|||
clInit(cl_context * const context) |
|||
{ |
|||
/* |
|||
* TODO add failure handling |
|||
*/ |
|||
cl_platform_id platform; |
|||
cl_uint num_devices; |
|||
|
|||
// get first available sdk and gpu and create context |
|||
clGetPlatformIDs(1, &platform, NULL); |
|||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 10, NULL, &num_devices); |
|||
printf("%u devices during init.\n", num_devices); |
|||
cl_device_id devices[num_devices]; |
|||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL); |
|||
*context = clCreateContext(NULL, num_devices, devices, NULL, NULL, NULL); |
|||
|
|||
return 0; |
|||
} |
|||
|
|||
int main() |
|||
{ |
|||
cl_int cl_status; |
|||
cl_context context; |
|||
cl_program program; |
|||
|
|||
size_t sourceSize; |
|||
size_t count; |
|||
|
|||
clInit(&context); |
|||
sourceSize = |
|||
clGetProgramFromSourceFile(KERNEL, &context, &program); |
|||
|
|||
assert(sourceSize != 0); |
|||
|
|||
count = clPutProgramBinaryToFile(KERNEL "bin", &program); |
|||
|
|||
assert(count != 0); |
|||
|
|||
clReleaseProgram(program); |
|||
clReleaseContext(context); |
|||
|
|||
return 0; |
|||
} |
|||
|
|||
// vim: set ft=c ts=4 sw=4: |
|||
@ -0,0 +1,71 @@ |
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#include <assert.h> |
|||
#ifdef __APPLE__ |
|||
#include <OpenCL/opencl.h> |
|||
#else |
|||
#include <CL/cl.h> |
|||
#endif |
|||
|
|||
#define KERNEL "part4.clbin" |
|||
|
|||
int main() { |
|||
|
|||
cl_platform_id platform; cl_device_id device; cl_context context; |
|||
cl_program program; cl_kernel kernel; cl_command_queue queue; |
|||
cl_mem kernelBuffer; |
|||
|
|||
FILE* programHandle; char *programBuffer; char *programLog; |
|||
size_t programSize; char hostBuffer[32]; |
|||
|
|||
// get first available sdk and gpu and create context |
|||
clGetPlatformIDs(1, &platform, NULL); |
|||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); |
|||
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); |
|||
|
|||
// get size of kernel source |
|||
programHandle = fopen(KERNEL, "rb"); |
|||
fseek(programHandle, 0, SEEK_END); |
|||
programSize = ftell(programHandle); |
|||
rewind(programHandle); |
|||
|
|||
// read kernel source into buffer |
|||
programBuffer = (char*) malloc(programSize + 1); |
|||
programBuffer[programSize] = '\0'; |
|||
assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle)); |
|||
|
|||
fclose(programHandle); |
|||
|
|||
// create and build program |
|||
program = clCreateProgramWithBinary(context, 1, &device, |
|||
(const size_t*)&programSize, (const unsigned char **) &programBuffer, NULL, NULL); |
|||
free(programBuffer); |
|||
|
|||
// create kernel and command queue |
|||
kernel = clCreateKernel(program, "hello", NULL); |
|||
queue = clCreateCommandQueue(context, device, 0, NULL); |
|||
|
|||
// create kernel argument buffer and set it into kernel |
|||
kernelBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, |
|||
32 * sizeof(char), NULL, NULL); |
|||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &kernelBuffer); |
|||
|
|||
// execute kernel, read back the output and print to screen |
|||
clEnqueueTask(queue, kernel, 0, NULL, NULL); |
|||
clEnqueueReadBuffer(queue, kernelBuffer, CL_TRUE, 0, |
|||
32 * sizeof(char), hostBuffer, 0, NULL, NULL); |
|||
puts(hostBuffer); |
|||
|
|||
clFlush(queue); |
|||
clFinish(queue); |
|||
clReleaseKernel(kernel); |
|||
clReleaseProgram(program); |
|||
clReleaseMemObject(kernelBuffer); |
|||
clReleaseCommandQueue(queue); |
|||
clReleaseContext(context); |
|||
|
|||
return 0; |
|||
|
|||
} |
|||
|
|||
// vim: set ft=c ts=4 sw=4: |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue