this is a continuation of this post. I must calculate many times some statistics(Max, mean, min, median and std dev) of arrays and I have a performance issue given the sort of my arrays in the method calcMaxMinMedian.
Given, I could not improve much further the summary statistics of an array performance. I am trying now to understand strategies and work arounds to parallelize my upper calls or any other smart thoughts.
I have seen this doc but I am not familiar As well as this (post)[https://stackoverflow.com/questions/20375176/should-i-always-use-a-parallel-stream-when-possible/20375622].
I tried using parallel streams, however probably given my SharedResource, the actual performance using the for loop was worse.
Time (s) functionUsingForLoop173
Time (s) functionUsingParallelStream194
Do anyone have an idea of what could I try to parallelize or any other thoughts on how to improve the overrall performance? Here is what I tried:
public class MaxMinMedianArrayUtils {
int[] sharedStaticResource={1,5,5};//Shared resource across
/**
* Return an array with summary statistics. Max, mean,std dev,median,min.
* Throw an IllegalArgumentException if array is empty.
*
* @param a array.
* @return array returning Max(0), mean(1),std dev(2),median(3),min(4) in
* respective
* positions.
*/
public static double[] getSummaryStatistics(double[] a) {
double[] summary = new double[5];
if (a.length == 0) {
throw new IllegalArgumentException(
"Array is empty, please " + "verify" + " the values.");
} else if (a.length == 1) {
summary[0] = a[0];
summary[1] = a[0];
summary[2] = 0;
summary[3] = a[0];
summary[4] = a[0];
} else {
double[] meandStd = calcMeanSDSample(a);
summary[1] = meandStd[0];//Mean
summary[2] = meandStd[1];//Standard Deviation
double[] maxMinMedian = calcMaxMinMedian(a);
summary[0] = maxMinMedian[0];//Max
summary[4] = maxMinMedian[1];//Min
summary[3] = maxMinMedian[2];//Median
}
return summary;
}
public static double[] calcMeanSDSample(double numArray[]) {
int length = numArray.length;
double[] meanStd = new double[2];
if (length == 0) {
throw new IllegalArgumentException(
"Array is empty, please " + "verify" + " the values.");
} else if (length == 1) {
meanStd[0] = numArray[0];
meanStd[1] = 0.0;
} else {
double sum = 0.0, standardDeviation = 0.0;
for (double num : numArray) {
sum += num;
}
meanStd[0] = sum / length;
for (double num : numArray) {
standardDeviation += Math.pow(num - meanStd[0], 2);
}
meanStd[1] =
Math.sqrt(standardDeviation / ((double) length - 1.0));//-1
// because it is
// for sample
}
return meanStd;
}
public static double[] calcMaxMinMedian(double[] a) {
double[] maxMinMedian = new double[3];
if (a.length == 0) {
throw new IllegalArgumentException(
"Array is empty, please " + "verify" + " the values.");
} else if (a.length == 1) {
for (int i = 0; i < 3; i++) {
maxMinMedian[i] = a[0];
}
} else {
Arrays.sort(a);
maxMinMedian[0] = a[a.length - 1];
maxMinMedian[1] = a[0];
maxMinMedian[2] = (a.length % 2 != 0) ? (double) (a[a.length / 2]) :
(double) ((a[(a.length - 1) / 2] + a[a.length / 2]) / 2.0);
}
return maxMinMedian;
}
public static void main(String[] args) {
int numVals = 1000;
// double[] ar = new double[numVals];
int numCalculations = 2 * 1000 * 1 * 1000;
// int numCalculations = 2 * 1000;
MaxMinMedianArrayUtils maxMinMedianArrayUtils=
new MaxMinMedianArrayUtils();
Instant start = Instant.now();
double[][] statsPerCalculation=
maxMinMedianArrayUtils.functionUsingForLoop(numVals,
numCalculations);
Instant end = Instant.now();
long totalTime = Duration.between(start, end).toSeconds();
System.out.println("Time (s) functionUsingForLoop" + totalTime);
Instant start3 = Instant.now();
double[][] statsPerCalculation3=
maxMinMedianArrayUtils.functionUsingParallelStream(numVals,
numCalculations);
Instant end3 = Instant.now();
long totalTime3 = Duration.between(start3, end3).toSeconds();
System.out.println("Time (s) functionUsingParallelStream" + totalTime3);
}
private double[][] functionUsingForLoop(int numVals,
int numCalculations) {
// calculations that is used to get some values, but is not modified.
double[][] statsPerCalculation= new double[numCalculations][5];//Each
// line
// stores
// the stats of the array generated in the numCalculations loop
for (int i = 0; i < numCalculations; i++) {//Complete independent
// calculations that I want to parallelize
double[]array=functionSimulateCalculations(numVals);
double[] stats = getSummaryStatistics(array);
for(int s = 0; s < stats.length; s++) {//Copy
statsPerCalculation[i][s] = stats[s];
}
}
return statsPerCalculation;
}
private double[][] functionUsingParallelStream(int numVals,
int numCalculations) {
// calculations that is used to get some values, but is not modified.
double[][] statsPerCalculation= new double[numCalculations][5];//Each
// line
// stores
// the stats of the array generated in the numCalculations loop
double[][] finalStatsPerCalculation = statsPerCalculation;
IntStream.range(0,numCalculations).parallel().forEach((i)->{
double[] array=functionSimulateCalculations(numVals);
double[] stats = getSummaryStatistics(array);
for(int s = 0; s < stats.length; s++) {
finalStatsPerCalculation[i][s] = stats[s];
}
}
);
return statsPerCalculation;
}
private double[] functionSimulateCalculations(int numVals) {
double[] ar=new double[numVals];
for (int k = 0; k < numVals; k++) {//To simulate the
// actual function of my
// use case
ar[k] = Math.random()*sharedStaticResource[0];
}
return ar;
}
} // Utility