diff --git a/.gitignore b/.gitignore
index f100b80..3612461 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,10 @@
-part*
+part1
+part2
+part3
+part4
+part4.clbin
+part5
+part6
+part8
+createclbin
+part6bin
diff --git a/Makefile b/Makefile
index ec1d749..ff7e653 100644
--- a/Makefile
+++ b/Makefile
@@ -1,16 +1,18 @@
-CFLAGS += -O2 -march=native -std=c99 -I/usr/local/include
-LIBS += -lcl -L/usr/local/lib64/beignet
+CFLAGS += -O0 -Werror -ggdb -std=c99
+LIBS += -lOpenCL
 CC = cc
 
-BINARIES = part1 part2 part3 part4 part5 part6 part8
+BINARIES = part1 part2 part3 part4 part5 part6 part8 createclbin part6bin
 
 all: $(BINARIES)
 
 %: %.c
 	$(CC) $(CFLAGS) $(LIBS) -o $@ $<
-	strip $@
+
+part4.clbin: createclbin
+	createclbin
 
 .PHONY: clean
 
 clean:
-	rm $(BINARIES)
+	@rm -f $(BINARIES) part4.clbin
diff --git a/README.md b/README.md
index b92eb9a..5693aea 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,26 @@
-OpenCL tutorial notes
-=====================
+# OpenCL cookbook code
 
-URL: http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/
+These are some code examples from the
+[OpenCL cookbook tutorial series](http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/).
+
+## Description
+
+Basically I just took the examples from that tutorial and put them into a
+repo.
+
+## Requirements
+
+Some OpenCL capable hardware and the according OpenCL library exposing the
+OpenCL API. I tested this on an Intel GPU (Intel Corporation Haswell-ULT
+Integrated Graphics Controller (rev 09)) with the
+[beignet](https://www.freedesktop.org/wiki/Software/Beignet/)
+open source library.
+
+## License
+
+As far as I can say the code is free of any license. It's purpose is just to
+demonstrate OpenCL.
+
+## Author
+
+Dhruba Bandopadhyay
diff --git a/createclbin.c b/createclbin.c
new file mode 100644
index 0000000..3729f36
--- /dev/null
+++ b/createclbin.c
@@ -0,0 +1,210 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#define KERNEL "part4.cl"
+
+size_t
+clPutProgramBinaryToFile(
+		const char * const       filename,
+		const cl_program * const program)
+{
+	cl_int cl_status;
+
+	cl_uint num_devices;
+	cl_status = clGetProgramInfo(
+			*program,
+			CL_PROGRAM_NUM_DEVICES,
+			sizeof(cl_uint),
+			&num_devices,
+			NULL);
+
+	if (cl_status != CL_SUCCESS) {
+		return 0;
+	}
+
+	cl_device_id devices[num_devices];
+	cl_status =
+		clGetProgramInfo(
+				*program,
+				CL_PROGRAM_DEVICES,
+				sizeof(cl_device_id) * num_devices,
+				devices,
+				NULL);
+
+	if (cl_status != CL_SUCCESS) {
+		return 0;
+	}
+
+	size_t binary_size[num_devices];
+	cl_status =
+		clGetProgramInfo(
+				*program,
+				CL_PROGRAM_BINARY_SIZES,
+				sizeof(size_t) * num_devices,
+				binary_size,
+				NULL);
+
+	if (cl_status != CL_SUCCESS) {
+		return 0;
+	}
+
+	unsigned char * binaries[num_devices];
+	for (cl_uint i = 0; i < num_devices; i++) {
+		binaries[i] = (unsigned char *) malloc(binary_size[i]);
+	}
+	cl_status =
+		clGetProgramInfo(
+				*program,
+				CL_PROGRAM_BINARIES,
+				sizeof(unsigned char *) * num_devices,
+				binaries,
+				NULL);
+
+	if (cl_status != CL_SUCCESS) {
+		for (cl_uint i = 0; i < num_devices; i++) {
+			free(binaries[i]);
+		}
+		return 0;
+	}
+
+	FILE * handle = fopen(filename, "wb");
+	size_t size = fwrite(binaries[0], sizeof(unsigned char), binary_size[0], handle);
+
+	for (cl_uint i = 0; i < num_devices; i++) {
+		free(binaries[i]);
+	}
+	fclose(handle);
+
+	return size;
+}
+
+size_t
+clGetProgramFromSourceFile(
+		const char * const       filename,
+		const cl_context * const context,
+		cl_program * const       program)
+{
+	/*
+	 * Get a build OpenCL program from source
+	 */
+	FILE   * handle;
+	char   * buffer;
+	size_t   size;
+
+	cl_int         cl_status;
+	cl_uint        num_devices;
+
+	// get size of kernel source
+	handle = fopen(filename, "r");
+	fseek(handle, 0, SEEK_END);
+	size = ftell(handle);
+	rewind(handle);
+
+	// read kernel source into buffer
+	buffer = (char*) malloc(size + 1);
+	buffer[size] = '\0';
+
+	if (size != fread(buffer, sizeof(char), size, handle))
+	{
+		fclose(handle);
+		free(buffer);
+		return 0;
+	}
+
+	fclose(handle);
+
+	// create and build program
+	*program = clCreateProgramWithSource(
+			*context, 1, (const char**) &buffer, &size, &cl_status);
+
+	free(buffer);
+
+	if (cl_status != CL_SUCCESS) {
+		return 0;
+	}
+
+	cl_status = clGetContextInfo(
+			*context,
+			CL_CONTEXT_NUM_DEVICES,
+			sizeof(cl_uint),
+			&num_devices,
+			NULL);
+
+	if (cl_status != CL_SUCCESS) {
+		clReleaseProgram(*program);
+		return 0;
+	}
+
+	cl_device_id devices[num_devices];
+
+	cl_status = clGetContextInfo(
+			*context,
+			CL_CONTEXT_DEVICES,
+			sizeof(cl_device_id) * num_devices,
+			devices,
+			NULL);
+
+	cl_status = clBuildProgram(
+			*program, 1, devices, "-Werror -cl-std=CL1.1", NULL, NULL);
+
+	if (cl_status != CL_SUCCESS) {
+		clReleaseProgram(*program);
+		return 0;
+	}
+
+	return size;
+}
+
+int
+clInit(cl_context * const context)
+{
+	/*
+	 * TODO add failure handling
+	 */
+	cl_platform_id platform;
+	cl_uint        num_devices;
+
+	// get first available sdk and gpu and create context
+	clGetPlatformIDs(1, &platform, NULL);
+	clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 10, NULL, &num_devices);
+	printf("%u devices during init.\n", num_devices);
+	cl_device_id devices[num_devices];
+	clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL);
+	*context = clCreateContext(NULL, num_devices, devices, NULL, NULL, NULL);
+
+	return 0;
+}
+
+int main()
+{
+	cl_int     cl_status;
+	cl_context context;
+	cl_program program;
+
+	size_t sourceSize;
+	size_t count;
+
+	clInit(&context);
+	sourceSize =
+		clGetProgramFromSourceFile(KERNEL, &context, &program);
+
+	assert(sourceSize != 0);
+
+	count = clPutProgramBinaryToFile(KERNEL "bin", &program);
+
+	assert(count != 0);
+
+	clReleaseProgram(program);
+	clReleaseContext(context);
+
+	return 0;
+}
+
+// vim: set ft=c ts=4 sw=4:
diff --git a/part6bin.c b/part6bin.c
new file mode 100644
index 0000000..0b19604
--- /dev/null
+++ b/part6bin.c
@@ -0,0 +1,71 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#define KERNEL "part4.clbin"
+
+int main() {
+
+	cl_platform_id platform; cl_device_id device; cl_context context;
+	cl_program program; cl_kernel kernel; cl_command_queue queue;
+	cl_mem kernelBuffer;
+
+	FILE* programHandle; char *programBuffer; char *programLog;
+	size_t programSize; char hostBuffer[32];
+
+	// get first available sdk and gpu and create context
+	clGetPlatformIDs(1, &platform, NULL);
+	clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
+	context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
+
+	// get size of kernel source
+	programHandle = fopen(KERNEL, "rb");
+	fseek(programHandle, 0, SEEK_END);
+	programSize = ftell(programHandle);
+	rewind(programHandle);
+
+	// read kernel source into buffer
+	programBuffer = (char*) malloc(programSize + 1);
+	programBuffer[programSize] = '\0';
+	assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle));
+
+	fclose(programHandle);
+
+	// create and build program
+	program = clCreateProgramWithBinary(context, 1, &device,
+			(const size_t*)&programSize, (const unsigned char **) &programBuffer, NULL, NULL);
+	free(programBuffer);
+
+	// create kernel and command queue
+	kernel = clCreateKernel(program, "hello", NULL);
+	queue = clCreateCommandQueue(context, device, 0, NULL);
+
+	// create kernel argument buffer and set it into kernel
+	kernelBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+			32 * sizeof(char), NULL, NULL);
+	clSetKernelArg(kernel, 0, sizeof(cl_mem), &kernelBuffer);
+
+	// execute kernel, read back the output and print to screen
+	clEnqueueTask(queue, kernel, 0, NULL, NULL);
+	clEnqueueReadBuffer(queue, kernelBuffer, CL_TRUE, 0,
+			32 * sizeof(char), hostBuffer, 0, NULL, NULL);
+	puts(hostBuffer);
+
+	clFlush(queue);
+	clFinish(queue);
+	clReleaseKernel(kernel);
+	clReleaseProgram(program);
+	clReleaseMemObject(kernelBuffer);
+	clReleaseCommandQueue(queue);
+	clReleaseContext(context);
+
+	return 0;
+
+}
+
+// vim: set ft=c ts=4 sw=4: