commit
750a8ce92e
11 changed files with 521 additions and 0 deletions
-
16Makefile
-
4README.md
-
60part1.c
-
78part2.c
-
49part3.c
-
57part4.c
-
20part4.cl
-
71part5.c
-
72part6.c
-
83part8.c
-
11part8.cl
@ -0,0 +1,16 @@ |
|||
CFLAGS += -O2 -march=native -std=c99 -I/usr/local/include |
|||
LIBS += -lcl -L/usr/local/lib64/beignet |
|||
CC = cc |
|||
|
|||
BINARIES = part1 part2 part3 part4 part5 part6 part8 |
|||
|
|||
all: $(BINARIES) |
|||
|
|||
%: %.c |
|||
$(CC) $(CFLAGS) $(LIBS) -o $@ $< |
|||
strip $@ |
|||
|
|||
.PHONY: clean |
|||
|
|||
clean: |
|||
rm $(BINARIES) |
|||
@ -0,0 +1,4 @@ |
|||
OpenCL tutorial notes |
|||
===================== |
|||
|
|||
URL: http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/ |
|||
@ -0,0 +1,60 @@ |
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#ifdef __APPLE__ |
|||
#include <OpenCL/opencl.h> |
|||
#else |
|||
#include <CL/cl.h> |
|||
#endif |
|||
|
|||
int main() { |
|||
|
|||
int i, j; |
|||
char* info; |
|||
size_t infoSize; |
|||
|
|||
cl_uint platformCount; |
|||
cl_platform_id *platforms; |
|||
|
|||
const char* attributeNames[5] = { |
|||
"Name", "Vendor", "Version", "Profile", "Extensions" }; |
|||
|
|||
const cl_platform_info attributeTypes[5] = { |
|||
CL_PLATFORM_NAME, CL_PLATFORM_VENDOR, CL_PLATFORM_VERSION, |
|||
CL_PLATFORM_PROFILE, CL_PLATFORM_EXTENSIONS }; |
|||
|
|||
const int attributeCount = sizeof(attributeNames) / sizeof(char*); |
|||
|
|||
// get platform count |
|||
clGetPlatformIDs(5, NULL, &platformCount); |
|||
// get all platforms |
|||
platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount); |
|||
clGetPlatformIDs(platformCount, platforms, NULL); |
|||
|
|||
// for each platform print all attributes |
|||
for (i=0; i<platformCount; i++) { |
|||
|
|||
printf("\n %d. Platform \n", i+1); |
|||
|
|||
for (j=0; j<attributeCount; j++) { |
|||
|
|||
// get platform attribute value size |
|||
clGetPlatformInfo(platforms[i], attributeTypes[j], 0, NULL, &infoSize); |
|||
info = (char*) malloc(infoSize); |
|||
|
|||
// get platform attribute value |
|||
clGetPlatformInfo(platforms[i], attributeTypes[j], infoSize, info, NULL); |
|||
|
|||
printf(" %d.%d %-11s: %s\n", i+1, j+1, attributeNames[j], info); |
|||
free(info); |
|||
|
|||
} |
|||
|
|||
printf("\n"); |
|||
|
|||
} |
|||
|
|||
free(platforms); |
|||
return 0; |
|||
} |
|||
|
|||
// vim: set ft=c ts=4 sw=4: |
|||
@ -0,0 +1,78 @@ |
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#ifdef __APPLE__ |
|||
#include <OpenCL/opencl.h> |
|||
#else |
|||
#include <CL/cl.h> |
|||
#endif |
|||
|
|||
int main() { |
|||
|
|||
int i, j; |
|||
char* value; |
|||
size_t valueSize; |
|||
cl_uint platformCount; |
|||
cl_platform_id* platforms; |
|||
cl_uint deviceCount; |
|||
cl_device_id* devices; |
|||
cl_uint maxComputeUnits; |
|||
|
|||
// get all platforms |
|||
clGetPlatformIDs(0, NULL, &platformCount); |
|||
platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount); |
|||
clGetPlatformIDs(platformCount, platforms, NULL); |
|||
|
|||
for (i = 0; i < platformCount; i++) { |
|||
|
|||
// get all devices |
|||
clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount); |
|||
devices = (cl_device_id*) malloc(sizeof(cl_device_id) * deviceCount); |
|||
clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL); |
|||
|
|||
// for each device print critical attributes |
|||
for (j = 0; j < deviceCount; j++) { |
|||
|
|||
// print device name |
|||
clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 0, NULL, &valueSize); |
|||
value = (char*) malloc(valueSize); |
|||
clGetDeviceInfo(devices[j], CL_DEVICE_NAME, valueSize, value, NULL); |
|||
printf("%d. Device: %s\n", j+1, value); |
|||
free(value); |
|||
|
|||
// print hardware device version |
|||
clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, 0, NULL, &valueSize); |
|||
value = (char*) malloc(valueSize); |
|||
clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, valueSize, value, NULL); |
|||
printf(" %d.%d Hardware version: %s\n", j+1, 1, value); |
|||
free(value); |
|||
|
|||
// print software driver version |
|||
clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, 0, NULL, &valueSize); |
|||
value = (char*) malloc(valueSize); |
|||
clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, valueSize, value, NULL); |
|||
printf(" %d.%d Software version: %s\n", j+1, 2, value); |
|||
free(value); |
|||
|
|||
// print c version supported by compiler for device |
|||
clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &valueSize); |
|||
value = (char*) malloc(valueSize); |
|||
clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, valueSize, value, NULL); |
|||
printf(" %d.%d OpenCL C version: %s\n", j+1, 3, value); |
|||
free(value); |
|||
|
|||
// print parallel compute units |
|||
clGetDeviceInfo(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, |
|||
sizeof(maxComputeUnits), &maxComputeUnits, NULL); |
|||
printf(" %d.%d Parallel compute units: %d\n", j+1, 4, maxComputeUnits); |
|||
|
|||
} |
|||
|
|||
free(devices); |
|||
|
|||
} |
|||
|
|||
free(platforms); |
|||
return 0; |
|||
} |
|||
|
|||
// vim: set ft=c ts=4 sw=4: |
|||
@ -0,0 +1,49 @@ |
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#ifdef __APPLE__ |
|||
#include <OpenCL/opencl.h> |
|||
#else |
|||
#include <CL/cl.h> |
|||
#endif |
|||
|
|||
int main() { |
|||
|
|||
cl_platform_id platform; |
|||
cl_device_id device; |
|||
cl_context context; |
|||
cl_uint refCount; |
|||
|
|||
// get first available platform |
|||
clGetPlatformIDs(1, &platform, NULL); |
|||
|
|||
// get first available gpu device |
|||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); |
|||
|
|||
// create context |
|||
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); |
|||
|
|||
// get context reference count |
|||
clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT, |
|||
sizeof(refCount), &refCount, NULL); |
|||
printf("Ref count: %u ", refCount); |
|||
|
|||
// increment reference count |
|||
clRetainContext(context); |
|||
clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT, |
|||
sizeof(refCount), &refCount, NULL); |
|||
printf(">> %u ", refCount); |
|||
|
|||
// decrement reference count |
|||
clReleaseContext(context); |
|||
clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT, |
|||
sizeof(refCount), &refCount, NULL); |
|||
printf(">> %u ", refCount); |
|||
|
|||
// finally release context |
|||
clReleaseContext(context); |
|||
printf(">> 0\n"); |
|||
return 0; |
|||
|
|||
} |
|||
|
|||
// vim: set ft=c ts=4 sw=4: |
|||
@ -0,0 +1,57 @@ |
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#include <assert.h> |
|||
#ifdef __APPLE__ |
|||
#include <OpenCL/opencl.h> |
|||
#else |
|||
#include <CL/cl.h> |
|||
#endif |
|||
|
|||
#define KERNEL "part4.cl" |
|||
|
|||
int main() { |
|||
|
|||
cl_platform_id platform; |
|||
cl_device_id device; |
|||
cl_context context; |
|||
cl_program program; |
|||
|
|||
FILE* programHandle; |
|||
size_t programSize, kernelSourceSize; |
|||
char *programBuffer, *kernelSource; |
|||
|
|||
// get first available platform and gpu and create context |
|||
clGetPlatformIDs(1, &platform, NULL); |
|||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); |
|||
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); |
|||
|
|||
// get size of kernel source |
|||
programHandle = fopen(KERNEL, "r"); |
|||
fseek(programHandle, 0, SEEK_END); |
|||
programSize = ftell(programHandle); |
|||
rewind(programHandle); |
|||
|
|||
// read kernel source into buffer |
|||
programBuffer = (char*) malloc(programSize + 1); |
|||
programBuffer[programSize] = '\0'; |
|||
assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle)); |
|||
fclose(programHandle); |
|||
|
|||
// create program from buffer |
|||
program = clCreateProgramWithSource(context, 1, |
|||
(const char**) &programBuffer, &programSize, NULL); |
|||
free(programBuffer); |
|||
|
|||
// read kernel source back in from program to check |
|||
clGetProgramInfo(program, CL_PROGRAM_SOURCE, 0, NULL, &kernelSourceSize); |
|||
kernelSource = (char*) malloc(kernelSourceSize); |
|||
clGetProgramInfo(program, CL_PROGRAM_SOURCE, kernelSourceSize, kernelSource, NULL); |
|||
printf("\nKernel source:\n\n%s\n", kernelSource); |
|||
free(kernelSource); |
|||
|
|||
clReleaseContext(context); |
|||
return 0; |
|||
|
|||
} |
|||
|
|||
// vim: set ft=c ts=4 sw=4: |
|||
@ -0,0 +1,20 @@ |
|||
__kernel void hello(__global char* string) { |
|||
|
|||
string[0] = 'H'; |
|||
string[1] = 'e'; |
|||
string[2] = 'l'; |
|||
string[3] = 'l'; |
|||
string[4] = 'o'; |
|||
string[5] = ','; |
|||
string[6] = ' '; |
|||
string[7] = 'W'; |
|||
string[8] = 'o'; |
|||
string[9] = 'r'; |
|||
string[10] = 'l'; |
|||
string[11] = 'd'; |
|||
string[12] = '!'; |
|||
string[13] = 0; |
|||
|
|||
} |
|||
|
|||
// vim: ft=c ts=4 sw=4: |
|||
@ -0,0 +1,71 @@ |
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#include <assert.h> |
|||
#ifdef __APPLE__ |
|||
#include <OpenCL/opencl.h> |
|||
#else |
|||
#include <CL/cl.h> |
|||
#endif |
|||
|
|||
#define KERNEL "part4.cl" |
|||
|
|||
int main() { |
|||
|
|||
cl_platform_id platform; cl_device_id device; cl_context context; |
|||
cl_program program; cl_int error; cl_build_status status; |
|||
|
|||
FILE* programHandle; |
|||
char *programBuffer; char *programLog; |
|||
size_t programSize; size_t logSize; |
|||
|
|||
// get first available platform and gpu and create context |
|||
clGetPlatformIDs(1, &platform, NULL); |
|||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); |
|||
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); |
|||
|
|||
// get size of kernel source |
|||
programHandle = fopen(KERNEL, "r"); |
|||
fseek(programHandle, 0, SEEK_END); |
|||
programSize = ftell(programHandle); |
|||
rewind(programHandle); |
|||
|
|||
// read kernel source into buffer |
|||
programBuffer = (char*) malloc(programSize + 1); |
|||
programBuffer[programSize] = '\0'; |
|||
assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle)); |
|||
|
|||
fclose(programHandle); |
|||
|
|||
// create program from buffer |
|||
program = clCreateProgramWithSource(context, 1, |
|||
(const char**) &programBuffer, &programSize, NULL); |
|||
free(programBuffer); |
|||
|
|||
// build program |
|||
const char options[] = "-Werror -cl-std=CL1.1"; |
|||
error = clBuildProgram(program, 1, &device, options, NULL, NULL); |
|||
|
|||
// build failed |
|||
if (error != CL_SUCCESS) { |
|||
|
|||
// check build error and build status first |
|||
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, |
|||
sizeof(cl_build_status), &status, NULL); |
|||
|
|||
// check build log |
|||
clGetProgramBuildInfo(program, device, |
|||
CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize); |
|||
programLog = (char*) calloc (logSize+1, sizeof(char)); |
|||
clGetProgramBuildInfo(program, device, |
|||
CL_PROGRAM_BUILD_LOG, logSize+1, programLog, NULL); |
|||
printf("Build failed; error=%d, status=%d, programLog:\n\n%s", |
|||
error, status, programLog); |
|||
free(programLog); |
|||
|
|||
} |
|||
|
|||
clReleaseContext(context); |
|||
return 0; |
|||
} |
|||
|
|||
// vim: set ts=4 sw=4: |
|||
@ -0,0 +1,72 @@ |
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#include <assert.h> |
|||
#ifdef __APPLE__ |
|||
#include <OpenCL/opencl.h> |
|||
#else |
|||
#include <CL/cl.h> |
|||
#endif |
|||
|
|||
#define KERNEL "part4.cl" |
|||
|
|||
int main() { |
|||
|
|||
cl_platform_id platform; cl_device_id device; cl_context context; |
|||
cl_program program; cl_kernel kernel; cl_command_queue queue; |
|||
cl_mem kernelBuffer; |
|||
|
|||
FILE* programHandle; char *programBuffer; char *programLog; |
|||
size_t programSize; char hostBuffer[32]; |
|||
|
|||
// get first available sdk and gpu and create context |
|||
clGetPlatformIDs(1, &platform, NULL); |
|||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); |
|||
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); |
|||
|
|||
// get size of kernel source |
|||
programHandle = fopen(KERNEL, "r"); |
|||
fseek(programHandle, 0, SEEK_END); |
|||
programSize = ftell(programHandle); |
|||
rewind(programHandle); |
|||
|
|||
// read kernel source into buffer |
|||
programBuffer = (char*) malloc(programSize + 1); |
|||
programBuffer[programSize] = '\0'; |
|||
assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle)); |
|||
|
|||
fclose(programHandle); |
|||
|
|||
// create and build program |
|||
program = clCreateProgramWithSource(context, 1, |
|||
(const char**) &programBuffer, &programSize, NULL); |
|||
free(programBuffer); |
|||
clBuildProgram(program, 1, &device, "-Werror -cl-std=CL1.1", NULL, NULL); |
|||
|
|||
// create kernel and command queue |
|||
kernel = clCreateKernel(program, "hello", NULL); |
|||
queue = clCreateCommandQueue(context, device, 0, NULL); |
|||
|
|||
// create kernel argument buffer and set it into kernel |
|||
kernelBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, |
|||
32 * sizeof(char), NULL, NULL); |
|||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &kernelBuffer); |
|||
|
|||
// execute kernel, read back the output and print to screen |
|||
clEnqueueTask(queue, kernel, 0, NULL, NULL); |
|||
clEnqueueReadBuffer(queue, kernelBuffer, CL_TRUE, 0, |
|||
32 * sizeof(char), hostBuffer, 0, NULL, NULL); |
|||
puts(hostBuffer); |
|||
|
|||
clFlush(queue); |
|||
clFinish(queue); |
|||
clReleaseKernel(kernel); |
|||
clReleaseProgram(program); |
|||
clReleaseMemObject(kernelBuffer); |
|||
clReleaseCommandQueue(queue); |
|||
clReleaseContext(context); |
|||
|
|||
return 0; |
|||
|
|||
} |
|||
|
|||
// vim: set ft=c ts=4 sw=4: |
|||
@ -0,0 +1,83 @@ |
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#include <assert.h> |
|||
#ifdef __APPLE__ |
|||
#include <OpenCL/opencl.h> |
|||
#else |
|||
#include <CL/cl.h> |
|||
#endif |
|||
|
|||
#define KERNEL "part8.cl" |
|||
|
|||
void cpu_3d_loop (int x, int y, int z) { |
|||
for (int i = 0; i < x; i++) { |
|||
for (int j = 0; j < y; j++) { |
|||
for (int k = 0; k < z; k++) { |
|||
printf("CPU %d,%d,%d\n", i, j, k); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
int main() { |
|||
|
|||
cl_platform_id platform; cl_device_id device; cl_context context; |
|||
cl_program program; cl_kernel kernel; cl_command_queue queue; |
|||
cl_mem kernelBuffer; |
|||
|
|||
FILE* programHandle; char *programBuffer; char *programLog; |
|||
size_t programSize; char hostBuffer[32]; |
|||
|
|||
int x = 4; |
|||
int y = 3; |
|||
int z = 2; |
|||
|
|||
cpu_3d_loop(x, y, z); |
|||
|
|||
// get first available sdk and gpu and create context |
|||
clGetPlatformIDs(1, &platform, NULL); |
|||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); |
|||
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); |
|||
|
|||
// get size of kernel source |
|||
programHandle = fopen(KERNEL, "r"); |
|||
fseek(programHandle, 0, SEEK_END); |
|||
programSize = ftell(programHandle); |
|||
rewind(programHandle); |
|||
|
|||
// read kernel source into buffer |
|||
programBuffer = (char*) malloc(programSize + 1); |
|||
programBuffer[programSize] = '\0'; |
|||
assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle)); |
|||
|
|||
fclose(programHandle); |
|||
|
|||
// create and build program |
|||
program = clCreateProgramWithSource(context, 1, |
|||
(const char**) &programBuffer, &programSize, NULL); |
|||
free(programBuffer); |
|||
clBuildProgram(program, 1, &device, "-Werror -cl-std=CL1.1", NULL, NULL); |
|||
|
|||
// create kernel and command queue |
|||
kernel = clCreateKernel(program, "ndrange_parallelism", NULL); |
|||
queue = clCreateCommandQueue(context, device, 0, NULL); |
|||
|
|||
size_t globalWorkSize[3] = {x, y, z}; |
|||
|
|||
// execute kernel, read back the output and print to screen |
|||
clEnqueueNDRangeKernel( |
|||
queue, kernel, 3, NULL, globalWorkSize, NULL, 0, NULL, NULL); |
|||
|
|||
clFlush(queue); |
|||
clFinish(queue); |
|||
clReleaseKernel(kernel); |
|||
clReleaseProgram(program); |
|||
clReleaseMemObject(kernelBuffer); |
|||
clReleaseCommandQueue(queue); |
|||
clReleaseContext(context); |
|||
|
|||
return 0; |
|||
|
|||
} |
|||
|
|||
// vim: set ft=c ts=4 sw=4: |
|||
@ -0,0 +1,11 @@ |
|||
__kernel void ndrange_parallelism () { |
|||
|
|||
int i = get_global_id(0); |
|||
int j = get_global_id(1); |
|||
int k = get_global_id(2); |
|||
|
|||
printf("GPU %d,%d,%d\n", i, j, k); |
|||
|
|||
} |
|||
|
|||
// vim: set ft=c ts=4 sw=4: |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue