diff --git a/.gitignore b/.gitignore index f100b80..3612461 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,10 @@ -part* +part1 +part2 +part3 +part4 +part4.clbin +part5 +part6 +part8 +createclbin +part6bin diff --git a/Makefile b/Makefile index ec1d749..ff7e653 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,18 @@ -CFLAGS += -O2 -march=native -std=c99 -I/usr/local/include -LIBS += -lcl -L/usr/local/lib64/beignet +CFLAGS += -O0 -Werror -ggdb -std=c99 +LIBS += -lOpenCL CC = cc -BINARIES = part1 part2 part3 part4 part5 part6 part8 +BINARIES = part1 part2 part3 part4 part5 part6 part8 createclbin part6bin all: $(BINARIES) %: %.c $(CC) $(CFLAGS) $(LIBS) -o $@ $< - strip $@ + +part4.clbin: createclbin + createclbin .PHONY: clean clean: - rm $(BINARIES) + @rm -f $(BINARIES) part4.clbin diff --git a/README.md b/README.md index b92eb9a..5693aea 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,26 @@ -OpenCL tutorial notes -===================== +# OpenCL cookbook code -URL: http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/ +These are some code examples from the +[OpenCL cookbook tutorial series](http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/). + +## Description + +Basically I just took the examples from that tutorial and put them into a +repo. + +## Requirements + +Some OpenCL capable hardware and the according OpenCL library exposing the +OpenCL API. I tested this on an Intel GPU (Intel Corporation Haswell-ULT +Integrated Graphics Controller (rev 09)) with the +[beignet](https://www.freedesktop.org/wiki/Software/Beignet/) +open source library. + +## License + +As far as I can say the code is free of any license. It's purpose is just to +demonstrate OpenCL. + +## Author + +Dhruba Bandopadhyay diff --git a/createclbin.c b/createclbin.c new file mode 100644 index 0000000..3729f36 --- /dev/null +++ b/createclbin.c @@ -0,0 +1,210 @@ +#include +#include +#include + +#ifdef __APPLE__ +#include +#else +#include +#endif + +#define KERNEL "part4.cl" + +size_t +clPutProgramBinaryToFile( + const char * const filename, + const cl_program * const program) +{ + cl_int cl_status; + + cl_uint num_devices; + cl_status = clGetProgramInfo( + *program, + CL_PROGRAM_NUM_DEVICES, + sizeof(cl_uint), + &num_devices, + NULL); + + if (cl_status != CL_SUCCESS) { + return 0; + } + + cl_device_id devices[num_devices]; + cl_status = + clGetProgramInfo( + *program, + CL_PROGRAM_DEVICES, + sizeof(cl_device_id) * num_devices, + devices, + NULL); + + if (cl_status != CL_SUCCESS) { + return 0; + } + + size_t binary_size[num_devices]; + cl_status = + clGetProgramInfo( + *program, + CL_PROGRAM_BINARY_SIZES, + sizeof(size_t) * num_devices, + binary_size, + NULL); + + if (cl_status != CL_SUCCESS) { + return 0; + } + + unsigned char * binaries[num_devices]; + for (cl_uint i = 0; i < num_devices; i++) { + binaries[i] = (unsigned char *) malloc(binary_size[i]); + } + cl_status = + clGetProgramInfo( + *program, + CL_PROGRAM_BINARIES, + sizeof(unsigned char *) * num_devices, + binaries, + NULL); + + if (cl_status != CL_SUCCESS) { + for (cl_uint i = 0; i < num_devices; i++) { + free(binaries[i]); + } + return 0; + } + + FILE * handle = fopen(filename, "wb"); + size_t size = fwrite(binaries[0], sizeof(unsigned char), binary_size[0], handle); + + for (cl_uint i = 0; i < num_devices; i++) { + free(binaries[i]); + } + fclose(handle); + + return size; +} + +size_t +clGetProgramFromSourceFile( + const char * const filename, + const cl_context * const context, + cl_program * const program) +{ + /* + * Get a build OpenCL program from source + */ + FILE * handle; + char * buffer; + size_t size; + + cl_int cl_status; + cl_uint num_devices; + + // get size of kernel source + handle = fopen(filename, "r"); + fseek(handle, 0, SEEK_END); + size = ftell(handle); + rewind(handle); + + // read kernel source into buffer + buffer = (char*) malloc(size + 1); + buffer[size] = '\0'; + + if (size != fread(buffer, sizeof(char), size, handle)) + { + fclose(handle); + free(buffer); + return 0; + } + + fclose(handle); + + // create and build program + *program = clCreateProgramWithSource( + *context, 1, (const char**) &buffer, &size, &cl_status); + + free(buffer); + + if (cl_status != CL_SUCCESS) { + return 0; + } + + cl_status = clGetContextInfo( + *context, + CL_CONTEXT_NUM_DEVICES, + sizeof(cl_uint), + &num_devices, + NULL); + + if (cl_status != CL_SUCCESS) { + clReleaseProgram(*program); + return 0; + } + + cl_device_id devices[num_devices]; + + cl_status = clGetContextInfo( + *context, + CL_CONTEXT_DEVICES, + sizeof(cl_device_id) * num_devices, + devices, + NULL); + + cl_status = clBuildProgram( + *program, 1, devices, "-Werror -cl-std=CL1.1", NULL, NULL); + + if (cl_status != CL_SUCCESS) { + clReleaseProgram(*program); + return 0; + } + + return size; +} + +int +clInit(cl_context * const context) +{ + /* + * TODO add failure handling + */ + cl_platform_id platform; + cl_uint num_devices; + + // get first available sdk and gpu and create context + clGetPlatformIDs(1, &platform, NULL); + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 10, NULL, &num_devices); + printf("%u devices during init.\n", num_devices); + cl_device_id devices[num_devices]; + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL); + *context = clCreateContext(NULL, num_devices, devices, NULL, NULL, NULL); + + return 0; +} + +int main() +{ + cl_int cl_status; + cl_context context; + cl_program program; + + size_t sourceSize; + size_t count; + + clInit(&context); + sourceSize = + clGetProgramFromSourceFile(KERNEL, &context, &program); + + assert(sourceSize != 0); + + count = clPutProgramBinaryToFile(KERNEL "bin", &program); + + assert(count != 0); + + clReleaseProgram(program); + clReleaseContext(context); + + return 0; +} + +// vim: set ft=c ts=4 sw=4: diff --git a/part6bin.c b/part6bin.c new file mode 100644 index 0000000..0b19604 --- /dev/null +++ b/part6bin.c @@ -0,0 +1,71 @@ +#include +#include +#include +#ifdef __APPLE__ +#include +#else +#include +#endif + +#define KERNEL "part4.clbin" + +int main() { + + cl_platform_id platform; cl_device_id device; cl_context context; + cl_program program; cl_kernel kernel; cl_command_queue queue; + cl_mem kernelBuffer; + + FILE* programHandle; char *programBuffer; char *programLog; + size_t programSize; char hostBuffer[32]; + + // get first available sdk and gpu and create context + clGetPlatformIDs(1, &platform, NULL); + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); + context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); + + // get size of kernel source + programHandle = fopen(KERNEL, "rb"); + fseek(programHandle, 0, SEEK_END); + programSize = ftell(programHandle); + rewind(programHandle); + + // read kernel source into buffer + programBuffer = (char*) malloc(programSize + 1); + programBuffer[programSize] = '\0'; + assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle)); + + fclose(programHandle); + + // create and build program + program = clCreateProgramWithBinary(context, 1, &device, + (const size_t*)&programSize, (const unsigned char **) &programBuffer, NULL, NULL); + free(programBuffer); + + // create kernel and command queue + kernel = clCreateKernel(program, "hello", NULL); + queue = clCreateCommandQueue(context, device, 0, NULL); + + // create kernel argument buffer and set it into kernel + kernelBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + 32 * sizeof(char), NULL, NULL); + clSetKernelArg(kernel, 0, sizeof(cl_mem), &kernelBuffer); + + // execute kernel, read back the output and print to screen + clEnqueueTask(queue, kernel, 0, NULL, NULL); + clEnqueueReadBuffer(queue, kernelBuffer, CL_TRUE, 0, + 32 * sizeof(char), hostBuffer, 0, NULL, NULL); + puts(hostBuffer); + + clFlush(queue); + clFinish(queue); + clReleaseKernel(kernel); + clReleaseProgram(program); + clReleaseMemObject(kernelBuffer); + clReleaseCommandQueue(queue); + clReleaseContext(context); + + return 0; + +} + +// vim: set ft=c ts=4 sw=4: