0

How should I optimize my code?

Given two sorted arrays arr1[] of size N and arr2[] of size M. Each array is sorted in non-decreasing order. Merge the two arrays into one sorted array in non-decreasing order without using any extra space.

Example 1:

Input:

N = 4, M = 5
arr1[] = {1, 3, 5, 7}
arr2[] = {0, 2, 6, 8, 9}

Output:

0 1 2 3 5 6 7 8 9

Explanation: Since you can't use any extra space, modify the given arrays to form:

arr1[] = {0, 1, 2, 3}
arr2[] = {5, 6, 7, 8, 9}
class Solution {
    public void merge(int arr1[], int arr2[], int n, int m) {
        for (int k = 0; k < n; k++) {
            boolean c = false;
            if (arr1[k] > arr2[0]) {
                int temp = arr1[k];
                arr1[k] = arr2[0];
                arr2[0] = temp;
                c = true;
            }
            if (c) {
                int minIndex = 0;
                for (int i = 0; i < m; i++) {
                    if (arr2[i] < arr2[minIndex])
                        minIndex = i;
                }
                int t = arr2[minIndex];
                arr2[minIndex] = arr2[0];
                arr2[0] = t;
            }
        }
        for (int i = 0; i < m - 1; i++) {
            int minIndex = i;
            for (int j = i; j < m; j++) {
                if (arr2[j] < arr2[minIndex]) {
                    minIndex = j;
                }
            }
            int temp = arr2[i];
            arr2[i] = arr2[minIndex];
            arr2[minIndex] = temp;
        }
    }
}

2 Answers2

0

Here is a slightly simpler version. It still has the same quadratic time complexity of O(n*m) but executes 10 times faster than the posted code, so it does qualify as an optimized version:

class Solution {
    public void merge_chqrlie(int arr1[], int arr2[], int n, int m) {
        if (n > 0 && m > 0) {
            // for each element of arr2
            for (int i1 = 0, i2 = 0, j, k = 0; k < m; k++) {
                // shift it left inside arr2
                for (j = k; j > i2 && arr2[j-1] > arr2[j]; j--) {
                    int temp = arr2[j-1];
                    arr2[j-1] = arr2[j];
                    arr2[j] = temp;
                }
                // if it moved to arr2[0] and is smaller than the last element of arr1
                if (j == 0 && arr1[n-1] > arr2[0]) {
                    // move it to arr1[n-1]
                    int temp = arr1[n-1];
                    arr1[n-1] = arr2[0];
                    arr2[0] = temp;
                    // and further shift it into arr1
                    for (j = n - 1; j > i1 && arr1[j-1] > arr1[j]; j--) {
                        temp = arr1[j-1];
                        arr1[j-1] = arr1[j];
                        arr1[j] = temp;
                    }
                    // update i1: the finalized portion of arr1
                    i1 = j + 1;
                } else {
                    // update i2: the finalized portion of arr2
                    i2 = j + 1;
                }
            }
        }
    }
}

Merging without using extra space is a very strict constraint: do you consider local variables to be extra space? If you allow for limited extra space, such as a single array of 128 elements, much faster solutions can be found that perform 500 to 2000 times faster than the posted code for moderately large arrays (above 5000 elements) but still exhibit quadratic time complexity for large arrays.

Here is an advanced solution (1000x faster for arrays of 500k elements):

class Solution {
    static int tmp_size = 128;
    static int *tmp;
    public void merge_chqrlie2(int arr1[], int arr2[], int n, int m) {
        if (!tmp)
            tmp = new int[tmp_size];
        for (int chunk = tmp_size; n > 0; n -= chunk, arr1 += chunk) {
            int i = 0, j = 0, k = 0;
            if (chunk > n)
                chunk = n;
            for (k = 0; k < chunk; k++)
                tmp[k] = arr1[k];
    
            for (k = 0; k < chunk; k++) {
                if (j >= m || tmp[i] <= arr2[j])
                    arr1[k] = tmp[i++];
                else
                    arr1[k] = arr2[j++];
            }
            for (k = 0; i < chunk; k++) {
                if (j >= m || tmp[i] <= arr2[j])
                    arr2[k] = tmp[i++];
                else
                    arr2[k] = arr2[j++];
            }
        }
    }
}

The temporary storage would be allocated on the stack (automatic storage) in languages such as C or C++ for optimum efficiency.

Here is an even more efficient one, with better performance on pathological samples and 2,5x faster on random contents:

void merge_chqrlie3(int arr1[], int arr2[], size_t n, size_t m) {
    int tmp[128];
    size_t i2 = 0;

    for (size_t chunk = sizeof(tmp) / sizeof(*tmp); n > 0; n -= chunk, arr1 += chunk) {
        size_t i = 0, j = 0, k = 0;

        if (i2 == 0) {
            while (n > 0 && arr1[0] <= arr2[0]) {
                arr1++;
                n--;
            }
        }
        if (chunk > n)
            chunk = n;

        for (k = 0; k < chunk; k++)
            tmp[k] = arr1[k];

        if (chunk <= i2) {
            for (j = 0; j < chunk; j++)
                arr1[j] = arr2[j];
            for (k = 0; j < i2; k++)
                arr2[k] = arr2[j++];
        } else {
            for (k = 0; j < i2; k++)
                arr1[k] = arr2[j++];
            for (; k < chunk; k++) {
                if (j >= m || tmp[i] <= arr2[j])
                    arr1[k] = tmp[i++];
                else
                    arr1[k] = arr2[j++];
            }
            k = 0;
        }
        for (; i < chunk; k++) {
            if (j >= m || tmp[i] <= arr2[j])
                arr2[k] = tmp[i++];
            else
                arr2[k] = arr2[j++];
        }
        i2 = k;
    }
}

Reducing the temporary storage size slows down execution time almost linearly: merge_chrlie2 is still 100x faster than the posted code with a local array of 12 elements, which can hardly be considered extra storage.

I ran a benchmark in C, using the no space merge function as the merge phase of a classic top down recursive merge sort. Here are the timings(*) with a temporary array of 128 elements:

        size        malloc      chqrlie3      chqrlie2       chqrlie        shivam
        1000         0.059         0.064         0.064         0.252         2.010
        2000         0.153         0.124         0.126         0.836         7.872
        5000         0.441         0.505         0.528         5.218        49.947
       10000         0.667         0.769         0.898        19.850       205.316
       20000         1.531         1.917         2.195        85.185       812.061
       50000         4.036         6.123         9.244       524.873      5197.808
      100000         8.281        15.466        28.787      2064.165     20485.584
      200000        18.030        51.438       106.391      8342.140     82226.825
      500000        49.201       246.224       557.470     51982.830    511418.600
     1000000       112.594       915.575      2171.953    207096.552   2053797.858
     2000000       215.045      3883.104      8476.783    829806.868   8153974.589
     5000000       565.701     34142.299     67304.217   5855744.544  51565024.699

And here are the timings with a temporary array of just 5 elements:

        size        malloc      chqrlie3      chqrlie2       chqrlie        shivam
        1000         0.055         0.089         0.111         0.230         1.963
        2000         0.165         0.247         0.327         0.880         7.891
        5000         0.438         1.309         1.914         4.971        50.376
       10000         0.799         2.832         5.675        21.544       202.929
       20000         1.589         9.265        23.528        82.582       826.768
       50000         4.150        53.408       131.302       519.007      5089.592
      100000         8.375       205.644       533.308      2016.670     20431.584
      200000        17.291       797.865      2193.575      9536.996     82308.875
      500000        61.955      6565.826     15626.427     50813.910    508269.938
     1000000       105.836     21146.977     52530.060    205640.244   2036022.030

(*) timings in milliseconds, extrapolated for chqrlie and shivam for arrays larger than 50k and 200k respectively.

chqrlie
  • 131,814
  • 10
  • 121
  • 189
0

Since you can't use any extra space, you can implement a kind of selection sort of both these arrays at once n + m, checking each index along the way, whether it is already in the second array, or not:

public static void main(String[] args) {
    int[] arr1 = {1, 3, 5, 7};
    int[] arr2 = {0, 2, 6, 8, 9};

    selectionSort(arr1, arr2);

    System.out.println(Arrays.toString(arr1)); // [0, 1, 2, 3]
    System.out.println(Arrays.toString(arr2)); // [5, 6, 7, 8, 9]
}
public static void selectionSort(int[] arr1, int[] arr2) {
    int n = arr1.length;
    int m = arr2.length;
    for (int i = 0; i < n + m; i++) {
        int min = i < n ? arr1[i] : arr2[i - n];
        int min_i = i;
        for (int j = i + 1; j < n + m; j++)
            if (j < n ? arr1[j] < min : arr2[j - n] < min) {
                min = j < n ? arr1[j] : arr2[j - n];
                min_i = j;
            }
        if (i != min_i)
            if (i < n) {
                int temp = arr1[i];
                if (min_i < n) {
                    arr1[i] = arr1[min_i];
                    arr1[min_i] = temp;
                } else {
                    arr1[i] = arr2[min_i - n];
                    arr2[min_i - n] = temp;
                }
            } else {
                int temp = arr2[i - n];
                if (min_i < n) {
                    arr2[i - n] = arr1[min_i];
                    arr1[min_i] = temp;
                } else {
                    arr2[i - n] = arr2[min_i - n];
                    arr2[min_i - n] = temp;
                }
            }
    }
}

See also: Java Selection Sort