for(int i=0;i<16384;i++)
array[i]=expensive_function(array[i]);
can be partitioned to all OpenCL-capable devices: // use all GPUs and CPU at the same time
var numberCruncher = new ClNumberCruncher(AcceleratorType.GPU|AcceleratorType.CPU,
@"__kernel void acceleratedLoop(__global float *a)
{
int threadId=get_global_id(0);
a[threadId]=pow(tanh(sqrt(cos(sin(a[threadId])))),0.3f);
}");
ClArray<float> buffer = array;
buffer.compute(numberCruncher,1,"acceleratedLoop",16384);
// now array has computed values by 16384 workitems on different devices such as
// gpus cpus igpus and fpgas
you can view a quick tutorial and download binaries (for lazy developers) here:https://www.codeproject.com/Articles/1181213/Easy-OpenCL-Multiple-Device-Load-Balancing-and-Pip
if you want to build the source on your computer yourself and to read a detailed wiki:
https://github.com/tugrul512bit/Cekirdekler/wiki