0

This open CL code compiles okay but on execution keeps returning 0.0f as the value of 'call[0]'

typedef struct tag_sinterest {
    __global float *h_phi;
    __global float *l_phi;
} sinterest;

typedef struct tag_sfutures {
    int time;
    __global float put;
    __global sinterest *call;
} sfutures;

float square(float _number) {
    return(_number*_number);
}

__kernel void Futures(__global sfutures *_futures,
    int _futures_index, __global float *_call, __global float *_call_weight) {
    int i = get_global_id(0);
    int j = get_global_id(1);
    int k = get_global_id(2);

    float _weight = 0.0f;

    _weight += (float)square(_futures[_futures_index-1].call[j].h_phi[k]-_futures[i].call[j].h_phi[k]);
    _weight += (float)square(_futures[_futures_index-1].call[j].l_phi[k]-_futures[i].call[j].l_phi[k]);

    _call[0] += _weight*_futures[k].put;
    _call_weight[0] += _weight;
}

It is meant to replicate these for loops which work fine on the host (C#):

void Futures(Sfutures[] Futures,int FuturesIndex,ref float Call,ref float CallWeight) {
    for(int f=0;f<_FUTURES;f++) {
        for(int I=0;I<_INTEREST;I++) {
            float _weight = 0.0f;

            _weight += (float)pow(Futures[FuturesIndex-1].call[f].h_phi[I]-Futures[i].call[f].h_phi[I],2.0f);
            _weight += (float)pow(Futures[FuturesIndex-1].call[f].l_phi[I]-Futures[i].call[f].l_phi[I],2.0f);

            Call += _weight*Futures[i].put;
            CallWeight += _weight;
        }
    }
}

Why is this? I have an intel HD 4000, OpenCL 1.2 on windows 10 64-bit

user703016
  • 37,307
  • 8
  • 87
  • 112
ssn
  • 439
  • 5
  • 14
  • Are you using C or C++? – fluter May 04 '16 at 05:17
  • don't you think that it could be a race condition in this lines `_call[0] += _weight*_futures[k].put; _call_weight[0] += _weight;` ? – segevara May 04 '16 at 05:31
  • @segevara how can I call them sequentially? – ssn May 04 '16 at 05:32
  • I mean that many threads is trying to add value in the same cell of array simultaneously – segevara May 04 '16 at 05:35
  • you can use [atomic operation](https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/atomicFunctions.html) of opencl or [barriers](http://stackoverflow.com/questions/6890302/barriers-in-opencl) – segevara May 04 '16 at 05:38
  • @fluter On the host I am using mql5, just provided a translation to show what I want to replicate in the kernel – ssn May 04 '16 at 05:39
  • @segevara tried local and global barriers but the result is the same 0.0f – ssn May 04 '16 at 06:01
  • Structure of pointers, reduction using just += ..... Just delete all the code and start over. You need first to understand CL and how it works, before attempting to do one of the most difficult operations that can be done in CL. – DarkZeros May 04 '16 at 08:25
  • 1
    Ah, and you are very lucky it returns 0, and it does not crash your PC, after all those out of bounds memory accesses. Probably cl calls are returning error codes and not actually updating the output buffer value. – DarkZeros May 04 '16 at 08:37

0 Answers0