2

I am learning multicore programming in C# using TPL. Came up with a problem which I felt could be a easy to work with and one which could use multicore to make it faster. However on running the program, I find the single core implementation faster than multicore. Damn! What did I miss? Note that I am using the same PC.

Problem: Given a number find all factorials from 0 to that number.

My Solution: Since I have a 4 core CPU, i hardcoded my implementation to use 4 Tasks. The tasks evenly split out the numbers. If there are 100 numbers, each Task works on about 25 numbers each (Task 1 works with 0, 4, 8, 12... and Task 2 with 1, 4, 9, 13...).

Here is my solution (Max input number it seems to accept is 9999)

public class Factorial
{
    public int TotalNoOfElements { get; set; }

    public BigInteger GetFactorial(BigInteger number)
    {
        if (number == 0)
            return 0;

        if (number == 1)
            return 1;

        return number * GetFactorial(number - 1);
    }

    public BigInteger[] GetFactorialsUsingSingleCore(int number)
    {
        BigInteger[] factorials = new BigInteger[number + 1];

        factorials[0] = 0;
        for (int index = 1; index <= number; index++)
        {
            Factorial f = new Factorial();      //TODO: remove. Throws stackoverflow exception @ GetFactorial() if local method is used directly.
            factorials[index] = f.GetFactorial(index);
        }

        return factorials;
    }

    public BigInteger[] GetFactorialsForNumbers(int[] numbers)
    {
        BigInteger[] factorials = new BigInteger[TotalNoOfElements + 1];
        foreach (int item in numbers)
        {
            Factorial f = new Factorial();      //TODO: remove. Throws stackoverflow exception @ GetFactorial() if local method is used directly.
            factorials[item] = f.GetFactorial(item);
        }
        return factorials;
    }

    public BigInteger[] GetFactorialsUsingMultiCores(int number)
    {
        //int noOfCores = 4;
        StopWatchHelper stopWatchHelper = new StopWatchHelper();
        stopWatchHelper.StartWatch();

        BigInteger[] factorials = new BigInteger[TotalNoOfElements + 1];
        List<int[]> splitNumbersList = SplitNumbersByTasks(number);
        stopWatchHelper.StopWatch();
        stopWatchHelper.ShowElapsedTimeInConsole("Creating list...");

        Task<BigInteger[]> t1 = Task.Factory.StartNew(
                        () =>
                        GetFactorialsForNumbers(splitNumbersList[0])
                        );

        Task<BigInteger[]> t2 = Task.Factory.StartNew(
                        () =>
                        GetFactorialsForNumbers(splitNumbersList[1])
                        );

        Task<BigInteger[]> t3 = Task.Factory.StartNew(
                        () =>
                        GetFactorialsForNumbers(splitNumbersList[2])
                        );

        Task<BigInteger[]> t4 = Task.Factory.StartNew(
                        () =>
                        GetFactorialsForNumbers(splitNumbersList[3])
                        );

        Task[] tasks = { t1, t2, t3, t4 };
        Task.WaitAll(tasks);

        stopWatchHelper.StartWatch();
        //Consolidate result arrays into 1 single array
        foreach (Task<BigInteger[]> task in tasks)
        {
            BigInteger[] result = task.Result;
            for (int index = 0; index < result.Length; index++)
            {
                if(result[index] != 0)
                    factorials[index] = result[index];
            }
        }
        stopWatchHelper.StartWatch();
        stopWatchHelper.ShowElapsedTimeInConsole("Consolidating results...");

        return factorials;
    }

    //Hardcoded for now
    //Returns #ofArrays = InputNumber/TotalTasks such that the work is evenly divided to the Tasks
    //Given an input number, we shall split it evenly across different Tasks.
    //With input as 25, Task 1 gets -> 0, 4, 8, 12, ... and Task 2 gets 1, 5, 9, 13, ...
    List<int[]> SplitNumbersByTasks(int number)
    {
        int noOfCores = 4;
        List<int[]> splitNumbersList = new List<int[]>();

        int reminder = number % noOfCores;
        for (int i = 0; i < noOfCores; i++)
        {
            if (reminder == 0)
            {
                splitNumbersList.Add(new int[(number / noOfCores)]);
            }
            else
            {
                splitNumbersList.Add(new int[(number / noOfCores) + 1]);
                reminder--;
            }
        }

        int arrayIndex = 0;
        for (int index = 1; index <= number; index++)
        {
            splitNumbersList[0][arrayIndex] = index++;
            if (index > number) break;

            splitNumbersList[1][arrayIndex] = index++;
            if (index > number) break;

            splitNumbersList[2][arrayIndex] = index++;
            if (index > number) break;

            splitNumbersList[3][arrayIndex] = index;

            ++arrayIndex;
        }

        return splitNumbersList;
    }
}

Helper Class:

class StopWatchHelper
{
    Stopwatch stopWatch = new Stopwatch();

    public void StartWatch()
    {
        stopWatch.Reset();
        stopWatch.Start();
    }

    public void StopWatch()
    {
        stopWatch.Stop();
    }

    public void ShowElapsedTimeInConsole(string message)
    {
        Console.WriteLine(message);
        Console.WriteLine(new string('-', message.Length));
        Console.WriteLine("Minutes: " + stopWatch.Elapsed.Minutes);
        Console.WriteLine("Seconds: " + stopWatch.Elapsed.Seconds);
        Console.WriteLine("Milliseconds: " + stopWatch.Elapsed.Milliseconds);
        Console.WriteLine();
    }
}

Main:

class Program
{
    static void Main(string[] args)
    {
        int input;
        string strInput;
        bool isNumber;
        StopWatchHelper helper = new StopWatchHelper();

        Factorial factorial = new Factorial();
        //Get input recursively until input is not a number
        do
        {
            strInput = Console.ReadLine();
            Console.WriteLine();

            isNumber = Int32.TryParse(strInput, out input);
            factorial.TotalNoOfElements = input;
            if (isNumber)
            {
                helper.StartWatch();
                BigInteger[] factorialsUsingMultiCore = factorial.GetFactorialsUsingMultiCores(input);
                helper.StopWatch();
                helper.ShowElapsedTimeInConsole("Factorials computed using Multicore");

                helper.StartWatch();
                BigInteger[] factorialsUsingSingleCore = factorial.GetFactorialsUsingSingleCore(input);
                helper.StopWatch();
                helper.ShowElapsedTimeInConsole("Factorials computed using Singlecore");
            }

            Console.WriteLine();

        } while (isNumber);
    }
}
ShaQ.Blogs
  • 1,264
  • 4
  • 17
  • 30
  • 1
    I don't think 4 c# tasks will be executed on 4 cores necessarily. They can share one core, can't they? – Alex Jun 09 '16 at 15:43
  • Task manager shows all cores increasing activity from 5% to 25%. I am presuming it is using multicores. – ShaQ.Blogs Jun 09 '16 at 15:48
  • Tasks aren't executed in different threads always, the taskscheduler can decide to execute them on the same thread, so this can be the cause, take a look at this: http://stackoverflow.com/questions/13570579/is-it-possible-always-to-force-a-new-thread-with-task Also, if you want to ensure new threads, use the old way, just use the threadpool. – Gusman Jun 09 '16 at 15:51
  • 1
    How much faster? Its possible that the overhead of maintaining multiple threads is more that the savings of running in parallel. Try increasing the "size" of your tasks. – D Stanley Jun 09 '16 at 15:57
  • I presume by size you meant make the number larger. As I increase my number in multiples of 1000, the difference in time becomes larger. Single core being faster. – ShaQ.Blogs Jun 09 '16 at 16:08
  • I used this to get thread ID from within the Task, Thread.CurrentThread.ManagedThreadId. It always shows 4 different ID's for the 4 Tasks. So this is running in 4 different threads, Correct? – ShaQ.Blogs Jun 09 '16 at 16:09
  • 1
    You have a column of a hundred numbers you want to sum. What is faster: summing the numbers, or hiring ten workers, teaching them how to add, dividing up the numbers into ten buckets of ten, assigning a worker to each bucket, and then adding up those ten sums they give you? **Parallelism is expensive**. – Eric Lippert Jun 09 '16 at 16:15
  • 1
    The code is excessively expensive, recursion forces a *lot* of BigInteger copies to be made, they do not come for free. Right now the tasks are serialized by the heap lock and there is no concurrency. You can get ahead by using an app.config file that uses the `gcServer` element, makes the multicore version about twice as fast. – Hans Passant Jun 09 '16 at 16:18
  • @Eric: Any links on the same that I can read more. – ShaQ.Blogs Jun 09 '16 at 16:46
  • It seems like your benchmarking might be flawed. You're calling the multi-core code first and then the single core. What do your timings look like if you call the single core first and then the multi-core? Maybe JITing contributes to the increase in time for the multi-core code ? – Chris Dunaway Jun 09 '16 at 19:00
  • With Input as 5000, Running Multicore first I get 1m 08s for Multicore, and 33s for singlecore. Running singlecore first I get 34s for singlecore, and 1m 07s for multicore. So not much of a difference. – ShaQ.Blogs Jun 09 '16 at 20:01

0 Answers0