commit
750a8ce92e
11 changed files with 521 additions and 0 deletions
-
16Makefile
-
4README.md
-
60part1.c
-
78part2.c
-
49part3.c
-
57part4.c
-
20part4.cl
-
71part5.c
-
72part6.c
-
83part8.c
-
11part8.cl
@ -0,0 +1,16 @@ |
|||||
|
CFLAGS += -O2 -march=native -std=c99 -I/usr/local/include |
||||
|
LIBS += -lcl -L/usr/local/lib64/beignet |
||||
|
CC = cc |
||||
|
|
||||
|
BINARIES = part1 part2 part3 part4 part5 part6 part8 |
||||
|
|
||||
|
all: $(BINARIES) |
||||
|
|
||||
|
%: %.c |
||||
|
$(CC) $(CFLAGS) $(LIBS) -o $@ $< |
||||
|
strip $@ |
||||
|
|
||||
|
.PHONY: clean |
||||
|
|
||||
|
clean: |
||||
|
rm $(BINARIES) |
||||
@ -0,0 +1,4 @@ |
|||||
|
OpenCL tutorial notes |
||||
|
===================== |
||||
|
|
||||
|
URL: http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/ |
||||
@ -0,0 +1,60 @@ |
|||||
|
#include <stdio.h> |
||||
|
#include <stdlib.h> |
||||
|
#ifdef __APPLE__ |
||||
|
#include <OpenCL/opencl.h> |
||||
|
#else |
||||
|
#include <CL/cl.h> |
||||
|
#endif |
||||
|
|
||||
|
int main() { |
||||
|
|
||||
|
int i, j; |
||||
|
char* info; |
||||
|
size_t infoSize; |
||||
|
|
||||
|
cl_uint platformCount; |
||||
|
cl_platform_id *platforms; |
||||
|
|
||||
|
const char* attributeNames[5] = { |
||||
|
"Name", "Vendor", "Version", "Profile", "Extensions" }; |
||||
|
|
||||
|
const cl_platform_info attributeTypes[5] = { |
||||
|
CL_PLATFORM_NAME, CL_PLATFORM_VENDOR, CL_PLATFORM_VERSION, |
||||
|
CL_PLATFORM_PROFILE, CL_PLATFORM_EXTENSIONS }; |
||||
|
|
||||
|
const int attributeCount = sizeof(attributeNames) / sizeof(char*); |
||||
|
|
||||
|
// get platform count |
||||
|
clGetPlatformIDs(5, NULL, &platformCount); |
||||
|
// get all platforms |
||||
|
platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount); |
||||
|
clGetPlatformIDs(platformCount, platforms, NULL); |
||||
|
|
||||
|
// for each platform print all attributes |
||||
|
for (i=0; i<platformCount; i++) { |
||||
|
|
||||
|
printf("\n %d. Platform \n", i+1); |
||||
|
|
||||
|
for (j=0; j<attributeCount; j++) { |
||||
|
|
||||
|
// get platform attribute value size |
||||
|
clGetPlatformInfo(platforms[i], attributeTypes[j], 0, NULL, &infoSize); |
||||
|
info = (char*) malloc(infoSize); |
||||
|
|
||||
|
// get platform attribute value |
||||
|
clGetPlatformInfo(platforms[i], attributeTypes[j], infoSize, info, NULL); |
||||
|
|
||||
|
printf(" %d.%d %-11s: %s\n", i+1, j+1, attributeNames[j], info); |
||||
|
free(info); |
||||
|
|
||||
|
} |
||||
|
|
||||
|
printf("\n"); |
||||
|
|
||||
|
} |
||||
|
|
||||
|
free(platforms); |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
// vim: set ft=c ts=4 sw=4: |
||||
@ -0,0 +1,78 @@ |
|||||
|
#include <stdio.h> |
||||
|
#include <stdlib.h> |
||||
|
#ifdef __APPLE__ |
||||
|
#include <OpenCL/opencl.h> |
||||
|
#else |
||||
|
#include <CL/cl.h> |
||||
|
#endif |
||||
|
|
||||
|
int main() { |
||||
|
|
||||
|
int i, j; |
||||
|
char* value; |
||||
|
size_t valueSize; |
||||
|
cl_uint platformCount; |
||||
|
cl_platform_id* platforms; |
||||
|
cl_uint deviceCount; |
||||
|
cl_device_id* devices; |
||||
|
cl_uint maxComputeUnits; |
||||
|
|
||||
|
// get all platforms |
||||
|
clGetPlatformIDs(0, NULL, &platformCount); |
||||
|
platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount); |
||||
|
clGetPlatformIDs(platformCount, platforms, NULL); |
||||
|
|
||||
|
for (i = 0; i < platformCount; i++) { |
||||
|
|
||||
|
// get all devices |
||||
|
clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount); |
||||
|
devices = (cl_device_id*) malloc(sizeof(cl_device_id) * deviceCount); |
||||
|
clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL); |
||||
|
|
||||
|
// for each device print critical attributes |
||||
|
for (j = 0; j < deviceCount; j++) { |
||||
|
|
||||
|
// print device name |
||||
|
clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 0, NULL, &valueSize); |
||||
|
value = (char*) malloc(valueSize); |
||||
|
clGetDeviceInfo(devices[j], CL_DEVICE_NAME, valueSize, value, NULL); |
||||
|
printf("%d. Device: %s\n", j+1, value); |
||||
|
free(value); |
||||
|
|
||||
|
// print hardware device version |
||||
|
clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, 0, NULL, &valueSize); |
||||
|
value = (char*) malloc(valueSize); |
||||
|
clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, valueSize, value, NULL); |
||||
|
printf(" %d.%d Hardware version: %s\n", j+1, 1, value); |
||||
|
free(value); |
||||
|
|
||||
|
// print software driver version |
||||
|
clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, 0, NULL, &valueSize); |
||||
|
value = (char*) malloc(valueSize); |
||||
|
clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, valueSize, value, NULL); |
||||
|
printf(" %d.%d Software version: %s\n", j+1, 2, value); |
||||
|
free(value); |
||||
|
|
||||
|
// print c version supported by compiler for device |
||||
|
clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &valueSize); |
||||
|
value = (char*) malloc(valueSize); |
||||
|
clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, valueSize, value, NULL); |
||||
|
printf(" %d.%d OpenCL C version: %s\n", j+1, 3, value); |
||||
|
free(value); |
||||
|
|
||||
|
// print parallel compute units |
||||
|
clGetDeviceInfo(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, |
||||
|
sizeof(maxComputeUnits), &maxComputeUnits, NULL); |
||||
|
printf(" %d.%d Parallel compute units: %d\n", j+1, 4, maxComputeUnits); |
||||
|
|
||||
|
} |
||||
|
|
||||
|
free(devices); |
||||
|
|
||||
|
} |
||||
|
|
||||
|
free(platforms); |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
// vim: set ft=c ts=4 sw=4: |
||||
@ -0,0 +1,49 @@ |
|||||
|
#include <stdio.h> |
||||
|
#include <stdlib.h> |
||||
|
#ifdef __APPLE__ |
||||
|
#include <OpenCL/opencl.h> |
||||
|
#else |
||||
|
#include <CL/cl.h> |
||||
|
#endif |
||||
|
|
||||
|
int main() { |
||||
|
|
||||
|
cl_platform_id platform; |
||||
|
cl_device_id device; |
||||
|
cl_context context; |
||||
|
cl_uint refCount; |
||||
|
|
||||
|
// get first available platform |
||||
|
clGetPlatformIDs(1, &platform, NULL); |
||||
|
|
||||
|
// get first available gpu device |
||||
|
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); |
||||
|
|
||||
|
// create context |
||||
|
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); |
||||
|
|
||||
|
// get context reference count |
||||
|
clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT, |
||||
|
sizeof(refCount), &refCount, NULL); |
||||
|
printf("Ref count: %u ", refCount); |
||||
|
|
||||
|
// increment reference count |
||||
|
clRetainContext(context); |
||||
|
clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT, |
||||
|
sizeof(refCount), &refCount, NULL); |
||||
|
printf(">> %u ", refCount); |
||||
|
|
||||
|
// decrement reference count |
||||
|
clReleaseContext(context); |
||||
|
clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT, |
||||
|
sizeof(refCount), &refCount, NULL); |
||||
|
printf(">> %u ", refCount); |
||||
|
|
||||
|
// finally release context |
||||
|
clReleaseContext(context); |
||||
|
printf(">> 0\n"); |
||||
|
return 0; |
||||
|
|
||||
|
} |
||||
|
|
||||
|
// vim: set ft=c ts=4 sw=4: |
||||
@ -0,0 +1,57 @@ |
|||||
|
#include <stdio.h> |
||||
|
#include <stdlib.h> |
||||
|
#include <assert.h> |
||||
|
#ifdef __APPLE__ |
||||
|
#include <OpenCL/opencl.h> |
||||
|
#else |
||||
|
#include <CL/cl.h> |
||||
|
#endif |
||||
|
|
||||
|
#define KERNEL "part4.cl" |
||||
|
|
||||
|
int main() { |
||||
|
|
||||
|
cl_platform_id platform; |
||||
|
cl_device_id device; |
||||
|
cl_context context; |
||||
|
cl_program program; |
||||
|
|
||||
|
FILE* programHandle; |
||||
|
size_t programSize, kernelSourceSize; |
||||
|
char *programBuffer, *kernelSource; |
||||
|
|
||||
|
// get first available platform and gpu and create context |
||||
|
clGetPlatformIDs(1, &platform, NULL); |
||||
|
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); |
||||
|
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); |
||||
|
|
||||
|
// get size of kernel source |
||||
|
programHandle = fopen(KERNEL, "r"); |
||||
|
fseek(programHandle, 0, SEEK_END); |
||||
|
programSize = ftell(programHandle); |
||||
|
rewind(programHandle); |
||||
|
|
||||
|
// read kernel source into buffer |
||||
|
programBuffer = (char*) malloc(programSize + 1); |
||||
|
programBuffer[programSize] = '\0'; |
||||
|
assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle)); |
||||
|
fclose(programHandle); |
||||
|
|
||||
|
// create program from buffer |
||||
|
program = clCreateProgramWithSource(context, 1, |
||||
|
(const char**) &programBuffer, &programSize, NULL); |
||||
|
free(programBuffer); |
||||
|
|
||||
|
// read kernel source back in from program to check |
||||
|
clGetProgramInfo(program, CL_PROGRAM_SOURCE, 0, NULL, &kernelSourceSize); |
||||
|
kernelSource = (char*) malloc(kernelSourceSize); |
||||
|
clGetProgramInfo(program, CL_PROGRAM_SOURCE, kernelSourceSize, kernelSource, NULL); |
||||
|
printf("\nKernel source:\n\n%s\n", kernelSource); |
||||
|
free(kernelSource); |
||||
|
|
||||
|
clReleaseContext(context); |
||||
|
return 0; |
||||
|
|
||||
|
} |
||||
|
|
||||
|
// vim: set ft=c ts=4 sw=4: |
||||
@ -0,0 +1,20 @@ |
|||||
|
__kernel void hello(__global char* string) { |
||||
|
|
||||
|
string[0] = 'H'; |
||||
|
string[1] = 'e'; |
||||
|
string[2] = 'l'; |
||||
|
string[3] = 'l'; |
||||
|
string[4] = 'o'; |
||||
|
string[5] = ','; |
||||
|
string[6] = ' '; |
||||
|
string[7] = 'W'; |
||||
|
string[8] = 'o'; |
||||
|
string[9] = 'r'; |
||||
|
string[10] = 'l'; |
||||
|
string[11] = 'd'; |
||||
|
string[12] = '!'; |
||||
|
string[13] = 0; |
||||
|
|
||||
|
} |
||||
|
|
||||
|
// vim: ft=c ts=4 sw=4: |
||||
@ -0,0 +1,71 @@ |
|||||
|
#include <stdio.h> |
||||
|
#include <stdlib.h> |
||||
|
#include <assert.h> |
||||
|
#ifdef __APPLE__ |
||||
|
#include <OpenCL/opencl.h> |
||||
|
#else |
||||
|
#include <CL/cl.h> |
||||
|
#endif |
||||
|
|
||||
|
#define KERNEL "part4.cl" |
||||
|
|
||||
|
int main() { |
||||
|
|
||||
|
cl_platform_id platform; cl_device_id device; cl_context context; |
||||
|
cl_program program; cl_int error; cl_build_status status; |
||||
|
|
||||
|
FILE* programHandle; |
||||
|
char *programBuffer; char *programLog; |
||||
|
size_t programSize; size_t logSize; |
||||
|
|
||||
|
// get first available platform and gpu and create context |
||||
|
clGetPlatformIDs(1, &platform, NULL); |
||||
|
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); |
||||
|
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); |
||||
|
|
||||
|
// get size of kernel source |
||||
|
programHandle = fopen(KERNEL, "r"); |
||||
|
fseek(programHandle, 0, SEEK_END); |
||||
|
programSize = ftell(programHandle); |
||||
|
rewind(programHandle); |
||||
|
|
||||
|
// read kernel source into buffer |
||||
|
programBuffer = (char*) malloc(programSize + 1); |
||||
|
programBuffer[programSize] = '\0'; |
||||
|
assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle)); |
||||
|
|
||||
|
fclose(programHandle); |
||||
|
|
||||
|
// create program from buffer |
||||
|
program = clCreateProgramWithSource(context, 1, |
||||
|
(const char**) &programBuffer, &programSize, NULL); |
||||
|
free(programBuffer); |
||||
|
|
||||
|
// build program |
||||
|
const char options[] = "-Werror -cl-std=CL1.1"; |
||||
|
error = clBuildProgram(program, 1, &device, options, NULL, NULL); |
||||
|
|
||||
|
// build failed |
||||
|
if (error != CL_SUCCESS) { |
||||
|
|
||||
|
// check build error and build status first |
||||
|
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, |
||||
|
sizeof(cl_build_status), &status, NULL); |
||||
|
|
||||
|
// check build log |
||||
|
clGetProgramBuildInfo(program, device, |
||||
|
CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize); |
||||
|
programLog = (char*) calloc (logSize+1, sizeof(char)); |
||||
|
clGetProgramBuildInfo(program, device, |
||||
|
CL_PROGRAM_BUILD_LOG, logSize+1, programLog, NULL); |
||||
|
printf("Build failed; error=%d, status=%d, programLog:\n\n%s", |
||||
|
error, status, programLog); |
||||
|
free(programLog); |
||||
|
|
||||
|
} |
||||
|
|
||||
|
clReleaseContext(context); |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
// vim: set ts=4 sw=4: |
||||
@ -0,0 +1,72 @@ |
|||||
|
#include <stdio.h> |
||||
|
#include <stdlib.h> |
||||
|
#include <assert.h> |
||||
|
#ifdef __APPLE__ |
||||
|
#include <OpenCL/opencl.h> |
||||
|
#else |
||||
|
#include <CL/cl.h> |
||||
|
#endif |
||||
|
|
||||
|
#define KERNEL "part4.cl" |
||||
|
|
||||
|
int main() { |
||||
|
|
||||
|
cl_platform_id platform; cl_device_id device; cl_context context; |
||||
|
cl_program program; cl_kernel kernel; cl_command_queue queue; |
||||
|
cl_mem kernelBuffer; |
||||
|
|
||||
|
FILE* programHandle; char *programBuffer; char *programLog; |
||||
|
size_t programSize; char hostBuffer[32]; |
||||
|
|
||||
|
// get first available sdk and gpu and create context |
||||
|
clGetPlatformIDs(1, &platform, NULL); |
||||
|
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); |
||||
|
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); |
||||
|
|
||||
|
// get size of kernel source |
||||
|
programHandle = fopen(KERNEL, "r"); |
||||
|
fseek(programHandle, 0, SEEK_END); |
||||
|
programSize = ftell(programHandle); |
||||
|
rewind(programHandle); |
||||
|
|
||||
|
// read kernel source into buffer |
||||
|
programBuffer = (char*) malloc(programSize + 1); |
||||
|
programBuffer[programSize] = '\0'; |
||||
|
assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle)); |
||||
|
|
||||
|
fclose(programHandle); |
||||
|
|
||||
|
// create and build program |
||||
|
program = clCreateProgramWithSource(context, 1, |
||||
|
(const char**) &programBuffer, &programSize, NULL); |
||||
|
free(programBuffer); |
||||
|
clBuildProgram(program, 1, &device, "-Werror -cl-std=CL1.1", NULL, NULL); |
||||
|
|
||||
|
// create kernel and command queue |
||||
|
kernel = clCreateKernel(program, "hello", NULL); |
||||
|
queue = clCreateCommandQueue(context, device, 0, NULL); |
||||
|
|
||||
|
// create kernel argument buffer and set it into kernel |
||||
|
kernelBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, |
||||
|
32 * sizeof(char), NULL, NULL); |
||||
|
clSetKernelArg(kernel, 0, sizeof(cl_mem), &kernelBuffer); |
||||
|
|
||||
|
// execute kernel, read back the output and print to screen |
||||
|
clEnqueueTask(queue, kernel, 0, NULL, NULL); |
||||
|
clEnqueueReadBuffer(queue, kernelBuffer, CL_TRUE, 0, |
||||
|
32 * sizeof(char), hostBuffer, 0, NULL, NULL); |
||||
|
puts(hostBuffer); |
||||
|
|
||||
|
clFlush(queue); |
||||
|
clFinish(queue); |
||||
|
clReleaseKernel(kernel); |
||||
|
clReleaseProgram(program); |
||||
|
clReleaseMemObject(kernelBuffer); |
||||
|
clReleaseCommandQueue(queue); |
||||
|
clReleaseContext(context); |
||||
|
|
||||
|
return 0; |
||||
|
|
||||
|
} |
||||
|
|
||||
|
// vim: set ft=c ts=4 sw=4: |
||||
@ -0,0 +1,83 @@ |
|||||
|
#include <stdio.h> |
||||
|
#include <stdlib.h> |
||||
|
#include <assert.h> |
||||
|
#ifdef __APPLE__ |
||||
|
#include <OpenCL/opencl.h> |
||||
|
#else |
||||
|
#include <CL/cl.h> |
||||
|
#endif |
||||
|
|
||||
|
#define KERNEL "part8.cl" |
||||
|
|
||||
|
void cpu_3d_loop (int x, int y, int z) { |
||||
|
for (int i = 0; i < x; i++) { |
||||
|
for (int j = 0; j < y; j++) { |
||||
|
for (int k = 0; k < z; k++) { |
||||
|
printf("CPU %d,%d,%d\n", i, j, k); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
int main() { |
||||
|
|
||||
|
cl_platform_id platform; cl_device_id device; cl_context context; |
||||
|
cl_program program; cl_kernel kernel; cl_command_queue queue; |
||||
|
cl_mem kernelBuffer; |
||||
|
|
||||
|
FILE* programHandle; char *programBuffer; char *programLog; |
||||
|
size_t programSize; char hostBuffer[32]; |
||||
|
|
||||
|
int x = 4; |
||||
|
int y = 3; |
||||
|
int z = 2; |
||||
|
|
||||
|
cpu_3d_loop(x, y, z); |
||||
|
|
||||
|
// get first available sdk and gpu and create context |
||||
|
clGetPlatformIDs(1, &platform, NULL); |
||||
|
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); |
||||
|
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); |
||||
|
|
||||
|
// get size of kernel source |
||||
|
programHandle = fopen(KERNEL, "r"); |
||||
|
fseek(programHandle, 0, SEEK_END); |
||||
|
programSize = ftell(programHandle); |
||||
|
rewind(programHandle); |
||||
|
|
||||
|
// read kernel source into buffer |
||||
|
programBuffer = (char*) malloc(programSize + 1); |
||||
|
programBuffer[programSize] = '\0'; |
||||
|
assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle)); |
||||
|
|
||||
|
fclose(programHandle); |
||||
|
|
||||
|
// create and build program |
||||
|
program = clCreateProgramWithSource(context, 1, |
||||
|
(const char**) &programBuffer, &programSize, NULL); |
||||
|
free(programBuffer); |
||||
|
clBuildProgram(program, 1, &device, "-Werror -cl-std=CL1.1", NULL, NULL); |
||||
|
|
||||
|
// create kernel and command queue |
||||
|
kernel = clCreateKernel(program, "ndrange_parallelism", NULL); |
||||
|
queue = clCreateCommandQueue(context, device, 0, NULL); |
||||
|
|
||||
|
size_t globalWorkSize[3] = {x, y, z}; |
||||
|
|
||||
|
// execute kernel, read back the output and print to screen |
||||
|
clEnqueueNDRangeKernel( |
||||
|
queue, kernel, 3, NULL, globalWorkSize, NULL, 0, NULL, NULL); |
||||
|
|
||||
|
clFlush(queue); |
||||
|
clFinish(queue); |
||||
|
clReleaseKernel(kernel); |
||||
|
clReleaseProgram(program); |
||||
|
clReleaseMemObject(kernelBuffer); |
||||
|
clReleaseCommandQueue(queue); |
||||
|
clReleaseContext(context); |
||||
|
|
||||
|
return 0; |
||||
|
|
||||
|
} |
||||
|
|
||||
|
// vim: set ft=c ts=4 sw=4: |
||||
@ -0,0 +1,11 @@ |
|||||
|
__kernel void ndrange_parallelism () { |
||||
|
|
||||
|
int i = get_global_id(0); |
||||
|
int j = get_global_id(1); |
||||
|
int k = get_global_id(2); |
||||
|
|
||||
|
printf("GPU %d,%d,%d\n", i, j, k); |
||||
|
|
||||
|
} |
||||
|
|
||||
|
// vim: set ft=c ts=4 sw=4: |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue