0

This is my code code for odd even sort: This code compiling, and running okay but, not sorting I guess. Please help me I am currently using CUDA 11.3 on visual studio 2019. My idea is creating odd and even functions and run it on after another. I'm multi threading the process of each function. In other words if(arr[2k]>arr[2k+1]) swap(arr[2k],arr[2k+1]).

#include <stdio.h>
#include<iostream>
#include<chrono>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"

using namespace std;
using namespace std::chrono;

__global__ void Even(int *arr, int n) {
    int index = threadIdx.x;
    index = index * 2;
    if  (index < n-1) {
        if (arr[index ] > arr[index + 1]) {
            int temp = arr[index];
            arr[index] = arr[index+ 1];
            arr[index + 1] = temp;
        }
    }
}

__global__ void Odd(int* arr, int n) {
    int index = threadIdx.x;
        index = index * 2+1;
    if (index <= n - 2) {
        if (arr[index ] > arr[index + 1]) {
            int temp = arr[index];
                arr[index] = arr[index + 1];
                arr[index+ 1] = temp;
        }
    }
}
        
#define n 10
int main(){
    int *a;
    int* ptr;
    const int Size = sizeof(int) * n;

    cudaMalloc((void**)&ptr, Size);

    a = (int*)malloc(n * Size);

    srand(time(NULL));
    
    for(int i =0 ;i<n;i++){
        a[i] = rand()%n;
    }


    for (int i = 0; i < n; i++) {
       std:: cout << a[i] << " ";
    }
    std::cout << endl;

    cudaMemcpy(ptr, a, Size, cudaMemcpyHostToDevice);

    auto starttime = high_resolution_clock::now();
 

    for (int i = 0; i < n / 2; i++) {
       Even<<<1,n >>>(a, n);
        Odd<<<1,n >>>(a, n);

    }

    cudaMemcpy( a, ptr, Size, cudaMemcpyDeviceToHost);

    auto stoptime = high_resolution_clock::now();
    auto duration = duration_cast<microseconds>(stoptime-starttime);

    std::cout<<" time : " <<duration.count()<<"ms"<<endl;

    for (int i = 0; i < n; i++) {
        std::cout << a[i] << " ";
    }
    std::cout << endl;
    free(a);
    cudaFree(ptr);
 
    return 0;

}
bmbigo
  • 3
  • 4
  • I might be overlooking something, but why are you setting `arr[0] = 0;` in the `Odd()` function? Isn't this overwriting the contents of the first value in the array? – Dillon Apr 17 '21 at 18:47
  • Aside from the issue mentioned in my previous comment, it looks like you're calling the kernels with `a` (the host pointer) instead of with `ptr` (the device pointer). – Dillon Apr 17 '21 at 18:59
  • 1
    when I run your code and change the kernel calls to use `ptr` instead of `a`, I get a sorted result. You may have a broken CUDA install. In addition to making the change from `a` to `ptr` on both kernel calls, I suggest adding [proper CUDA error checking](https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api). – Robert Crovella Apr 18 '21 at 00:00

1 Answers1

1

I suspect there are two problems.

First, you are overwriting the first value in the array every time you run Odd(). You should remove the line arr[0] = 0; to fix this problem.

Second, you are passing the host pointer a instead of the device pointer ptr to the kernels. You should pass ptr instead.

With these (untested) edits, the code looks like this:

#include <stdio.h>
#include<iostream>
#include<chrono>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"

using namespace std;
using namespace std::chrono;

__global__ void Even(int *arr, int n) {
    int index = threadIdx.x;
    index = index * 2;
    if  (index < n-1) {
        if (arr[index ] > arr[index + 1]) {
            int temp = arr[index];
            arr[index] = arr[index+ 1];
            arr[index + 1] = temp;
        }
    }
}

__global__ void Odd(int* arr, int n) {
    int index = threadIdx.x;
    index = index * 2+1;
    // no longer setting a[0] = 0
    if (index <= n - 2) {
        if (arr[index ] > arr[index + 1]) {
            int temp = arr[index];
                arr[index] = arr[index + 1];
                arr[index+ 1] = temp;
        }
    }
}
        
#define n 10
int main(){
    int *a;
    int* ptr;
    const int Size = sizeof(int) * n;

    cudaMalloc((void**)&ptr, Size);

    a = (int*)malloc(n * Size);

    srand(time(NULL));
    
    for(int i =0 ;i<n;i++){
        a[i] = rand()%n;
    }


    for (int i = 0; i < n; i++) {
       std:: cout << a[i] << " ";
    }
    std::cout << endl;

    cudaMemcpy(ptr, a, Size, cudaMemcpyHostToDevice);

    auto starttime = high_resolution_clock::now();
 

    for (int i = 0; i < n / 2; i++) {
       Even<<<1,n >>>(ptr, n);  // ptr instead of a
        Odd<<<1,n >>>(ptr, n);  // ptr instead of a

    }

    cudaMemcpy( a, ptr, Size, cudaMemcpyDeviceToHost);

    auto stoptime = high_resolution_clock::now();
    auto duration = duration_cast<microseconds>(stoptime-starttime);

    std::cout<<" time : " <<duration.count()<<"ms"<<endl;

    for (int i = 0; i < n; i++) {
        std::cout << a[i] << " ";
    }
    std::cout << endl;
    free(a);
    cudaFree(ptr);
 
    return 0;

}
Dillon
  • 265
  • 2
  • 6
  • thank you for answering my question. I just tested as you told me but it's the same array is not sorted. currently, I am running my code on AWS windows maybe is it the problem? – bmbigo Apr 17 '21 at 19:14
  • Just to be sure, are you sure you recompiled the new code? I know I sometimes forget to do that myself. – Dillon Apr 17 '21 at 19:20
  • Yes, I recompiled the new code. I even tried it 5,6 times. Same unsorted array. – bmbigo Apr 17 '21 at 19:26
  • Thank you for double-checking, I just wanted to rule that out. One other potential problem I'm noticing is that you are calling `a = (int*)malloc(n * Size);`, but `Size` already accounts for `n`. What if you change this to `a = (int*)malloc(Size);`? – Dillon Apr 17 '21 at 19:29
  • It is still the same printing unsorted array. Thank you so much for checking my code and sharing your opinion with me – bmbigo Apr 17 '21 at 19:34
  • That's strange. I can take a look at this in more depth later today if no one else solves your problem before then. In the meantime, you might try running `Even()` once by itself and looking at the result after that. You can also use `printf()` statements inside kernel functions to help debug. – Dillon Apr 17 '21 at 19:38