Browse Source

checkin of the relevant examples of the OpenCL cookbook

master
Georg Hopp 11 years ago
commit
750a8ce92e
  1. 16
      Makefile
  2. 4
      README.md
  3. 60
      part1.c
  4. 78
      part2.c
  5. 49
      part3.c
  6. 57
      part4.c
  7. 20
      part4.cl
  8. 71
      part5.c
  9. 72
      part6.c
  10. 83
      part8.c
  11. 11
      part8.cl

16
Makefile

@ -0,0 +1,16 @@
CFLAGS += -O2 -march=native -std=c99 -I/usr/local/include
LIBS += -lcl -L/usr/local/lib64/beignet
CC = cc
BINARIES = part1 part2 part3 part4 part5 part6 part8
all: $(BINARIES)
%: %.c
$(CC) $(CFLAGS) $(LIBS) -o $@ $<
strip $@
.PHONY: clean
clean:
rm $(BINARIES)

4
README.md

@ -0,0 +1,4 @@
OpenCL tutorial notes
=====================
URL: http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/

60
part1.c

@ -0,0 +1,60 @@
#include <stdio.h>
#include <stdlib.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
int main() {
int i, j;
char* info;
size_t infoSize;
cl_uint platformCount;
cl_platform_id *platforms;
const char* attributeNames[5] = {
"Name", "Vendor", "Version", "Profile", "Extensions" };
const cl_platform_info attributeTypes[5] = {
CL_PLATFORM_NAME, CL_PLATFORM_VENDOR, CL_PLATFORM_VERSION,
CL_PLATFORM_PROFILE, CL_PLATFORM_EXTENSIONS };
const int attributeCount = sizeof(attributeNames) / sizeof(char*);
// get platform count
clGetPlatformIDs(5, NULL, &platformCount);
// get all platforms
platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
clGetPlatformIDs(platformCount, platforms, NULL);
// for each platform print all attributes
for (i=0; i<platformCount; i++) {
printf("\n %d. Platform \n", i+1);
for (j=0; j<attributeCount; j++) {
// get platform attribute value size
clGetPlatformInfo(platforms[i], attributeTypes[j], 0, NULL, &infoSize);
info = (char*) malloc(infoSize);
// get platform attribute value
clGetPlatformInfo(platforms[i], attributeTypes[j], infoSize, info, NULL);
printf(" %d.%d %-11s: %s\n", i+1, j+1, attributeNames[j], info);
free(info);
}
printf("\n");
}
free(platforms);
return 0;
}
// vim: set ft=c ts=4 sw=4:

78
part2.c

@ -0,0 +1,78 @@
#include <stdio.h>
#include <stdlib.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
int main() {
int i, j;
char* value;
size_t valueSize;
cl_uint platformCount;
cl_platform_id* platforms;
cl_uint deviceCount;
cl_device_id* devices;
cl_uint maxComputeUnits;
// get all platforms
clGetPlatformIDs(0, NULL, &platformCount);
platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
clGetPlatformIDs(platformCount, platforms, NULL);
for (i = 0; i < platformCount; i++) {
// get all devices
clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount);
devices = (cl_device_id*) malloc(sizeof(cl_device_id) * deviceCount);
clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL);
// for each device print critical attributes
for (j = 0; j < deviceCount; j++) {
// print device name
clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 0, NULL, &valueSize);
value = (char*) malloc(valueSize);
clGetDeviceInfo(devices[j], CL_DEVICE_NAME, valueSize, value, NULL);
printf("%d. Device: %s\n", j+1, value);
free(value);
// print hardware device version
clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, 0, NULL, &valueSize);
value = (char*) malloc(valueSize);
clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, valueSize, value, NULL);
printf(" %d.%d Hardware version: %s\n", j+1, 1, value);
free(value);
// print software driver version
clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, 0, NULL, &valueSize);
value = (char*) malloc(valueSize);
clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, valueSize, value, NULL);
printf(" %d.%d Software version: %s\n", j+1, 2, value);
free(value);
// print c version supported by compiler for device
clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &valueSize);
value = (char*) malloc(valueSize);
clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, valueSize, value, NULL);
printf(" %d.%d OpenCL C version: %s\n", j+1, 3, value);
free(value);
// print parallel compute units
clGetDeviceInfo(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(maxComputeUnits), &maxComputeUnits, NULL);
printf(" %d.%d Parallel compute units: %d\n", j+1, 4, maxComputeUnits);
}
free(devices);
}
free(platforms);
return 0;
}
// vim: set ft=c ts=4 sw=4:

49
part3.c

@ -0,0 +1,49 @@
#include <stdio.h>
#include <stdlib.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
int main() {
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_uint refCount;
// get first available platform
clGetPlatformIDs(1, &platform, NULL);
// get first available gpu device
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
// create context
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
// get context reference count
clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT,
sizeof(refCount), &refCount, NULL);
printf("Ref count: %u ", refCount);
// increment reference count
clRetainContext(context);
clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT,
sizeof(refCount), &refCount, NULL);
printf(">> %u ", refCount);
// decrement reference count
clReleaseContext(context);
clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT,
sizeof(refCount), &refCount, NULL);
printf(">> %u ", refCount);
// finally release context
clReleaseContext(context);
printf(">> 0\n");
return 0;
}
// vim: set ft=c ts=4 sw=4:

57
part4.c

@ -0,0 +1,57 @@
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#define KERNEL "part4.cl"
int main() {
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_program program;
FILE* programHandle;
size_t programSize, kernelSourceSize;
char *programBuffer, *kernelSource;
// get first available platform and gpu and create context
clGetPlatformIDs(1, &platform, NULL);
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
// get size of kernel source
programHandle = fopen(KERNEL, "r");
fseek(programHandle, 0, SEEK_END);
programSize = ftell(programHandle);
rewind(programHandle);
// read kernel source into buffer
programBuffer = (char*) malloc(programSize + 1);
programBuffer[programSize] = '\0';
assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle));
fclose(programHandle);
// create program from buffer
program = clCreateProgramWithSource(context, 1,
(const char**) &programBuffer, &programSize, NULL);
free(programBuffer);
// read kernel source back in from program to check
clGetProgramInfo(program, CL_PROGRAM_SOURCE, 0, NULL, &kernelSourceSize);
kernelSource = (char*) malloc(kernelSourceSize);
clGetProgramInfo(program, CL_PROGRAM_SOURCE, kernelSourceSize, kernelSource, NULL);
printf("\nKernel source:\n\n%s\n", kernelSource);
free(kernelSource);
clReleaseContext(context);
return 0;
}
// vim: set ft=c ts=4 sw=4:

20
part4.cl

@ -0,0 +1,20 @@
__kernel void hello(__global char* string) {
string[0] = 'H';
string[1] = 'e';
string[2] = 'l';
string[3] = 'l';
string[4] = 'o';
string[5] = ',';
string[6] = ' ';
string[7] = 'W';
string[8] = 'o';
string[9] = 'r';
string[10] = 'l';
string[11] = 'd';
string[12] = '!';
string[13] = 0;
}
// vim: ft=c ts=4 sw=4:

71
part5.c

@ -0,0 +1,71 @@
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#define KERNEL "part4.cl"
int main() {
cl_platform_id platform; cl_device_id device; cl_context context;
cl_program program; cl_int error; cl_build_status status;
FILE* programHandle;
char *programBuffer; char *programLog;
size_t programSize; size_t logSize;
// get first available platform and gpu and create context
clGetPlatformIDs(1, &platform, NULL);
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
// get size of kernel source
programHandle = fopen(KERNEL, "r");
fseek(programHandle, 0, SEEK_END);
programSize = ftell(programHandle);
rewind(programHandle);
// read kernel source into buffer
programBuffer = (char*) malloc(programSize + 1);
programBuffer[programSize] = '\0';
assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle));
fclose(programHandle);
// create program from buffer
program = clCreateProgramWithSource(context, 1,
(const char**) &programBuffer, &programSize, NULL);
free(programBuffer);
// build program
const char options[] = "-Werror -cl-std=CL1.1";
error = clBuildProgram(program, 1, &device, options, NULL, NULL);
// build failed
if (error != CL_SUCCESS) {
// check build error and build status first
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS,
sizeof(cl_build_status), &status, NULL);
// check build log
clGetProgramBuildInfo(program, device,
CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
programLog = (char*) calloc (logSize+1, sizeof(char));
clGetProgramBuildInfo(program, device,
CL_PROGRAM_BUILD_LOG, logSize+1, programLog, NULL);
printf("Build failed; error=%d, status=%d, programLog:\n\n%s",
error, status, programLog);
free(programLog);
}
clReleaseContext(context);
return 0;
}
// vim: set ts=4 sw=4:

72
part6.c

@ -0,0 +1,72 @@
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#define KERNEL "part4.cl"
int main() {
cl_platform_id platform; cl_device_id device; cl_context context;
cl_program program; cl_kernel kernel; cl_command_queue queue;
cl_mem kernelBuffer;
FILE* programHandle; char *programBuffer; char *programLog;
size_t programSize; char hostBuffer[32];
// get first available sdk and gpu and create context
clGetPlatformIDs(1, &platform, NULL);
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
// get size of kernel source
programHandle = fopen(KERNEL, "r");
fseek(programHandle, 0, SEEK_END);
programSize = ftell(programHandle);
rewind(programHandle);
// read kernel source into buffer
programBuffer = (char*) malloc(programSize + 1);
programBuffer[programSize] = '\0';
assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle));
fclose(programHandle);
// create and build program
program = clCreateProgramWithSource(context, 1,
(const char**) &programBuffer, &programSize, NULL);
free(programBuffer);
clBuildProgram(program, 1, &device, "-Werror -cl-std=CL1.1", NULL, NULL);
// create kernel and command queue
kernel = clCreateKernel(program, "hello", NULL);
queue = clCreateCommandQueue(context, device, 0, NULL);
// create kernel argument buffer and set it into kernel
kernelBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
32 * sizeof(char), NULL, NULL);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &kernelBuffer);
// execute kernel, read back the output and print to screen
clEnqueueTask(queue, kernel, 0, NULL, NULL);
clEnqueueReadBuffer(queue, kernelBuffer, CL_TRUE, 0,
32 * sizeof(char), hostBuffer, 0, NULL, NULL);
puts(hostBuffer);
clFlush(queue);
clFinish(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseMemObject(kernelBuffer);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}
// vim: set ft=c ts=4 sw=4:

83
part8.c

@ -0,0 +1,83 @@
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#define KERNEL "part8.cl"
void cpu_3d_loop (int x, int y, int z) {
for (int i = 0; i < x; i++) {
for (int j = 0; j < y; j++) {
for (int k = 0; k < z; k++) {
printf("CPU %d,%d,%d\n", i, j, k);
}
}
}
}
int main() {
cl_platform_id platform; cl_device_id device; cl_context context;
cl_program program; cl_kernel kernel; cl_command_queue queue;
cl_mem kernelBuffer;
FILE* programHandle; char *programBuffer; char *programLog;
size_t programSize; char hostBuffer[32];
int x = 4;
int y = 3;
int z = 2;
cpu_3d_loop(x, y, z);
// get first available sdk and gpu and create context
clGetPlatformIDs(1, &platform, NULL);
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
// get size of kernel source
programHandle = fopen(KERNEL, "r");
fseek(programHandle, 0, SEEK_END);
programSize = ftell(programHandle);
rewind(programHandle);
// read kernel source into buffer
programBuffer = (char*) malloc(programSize + 1);
programBuffer[programSize] = '\0';
assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle));
fclose(programHandle);
// create and build program
program = clCreateProgramWithSource(context, 1,
(const char**) &programBuffer, &programSize, NULL);
free(programBuffer);
clBuildProgram(program, 1, &device, "-Werror -cl-std=CL1.1", NULL, NULL);
// create kernel and command queue
kernel = clCreateKernel(program, "ndrange_parallelism", NULL);
queue = clCreateCommandQueue(context, device, 0, NULL);
size_t globalWorkSize[3] = {x, y, z};
// execute kernel, read back the output and print to screen
clEnqueueNDRangeKernel(
queue, kernel, 3, NULL, globalWorkSize, NULL, 0, NULL, NULL);
clFlush(queue);
clFinish(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseMemObject(kernelBuffer);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}
// vim: set ft=c ts=4 sw=4:

11
part8.cl

@ -0,0 +1,11 @@
__kernel void ndrange_parallelism () {
int i = get_global_id(0);
int j = get_global_id(1);
int k = get_global_id(2);
printf("GPU %d,%d,%d\n", i, j, k);
}
// vim: set ft=c ts=4 sw=4:
Loading…
Cancel
Save