4

I have a small program which I am using for algorithmic stock trading. The code has to loop about 192 trillion times on my 8-core desktop machine. I thought about renting out a 64 core machine to run this, but it wasn't cost effective.

It's just this bit of code. But the for loops has to loop on every bar to be calculated (about 1.8million) and then the list it loops through to check for a match is about 800k items.

The only way I could think of to speed it up for now, is to remove the matched item, as it only happens once (DateTime).

Does anyone else have a way to speed this code up a bit faster? It's taking my desktop beast about 45 hours to run through one iteration of the program.

Basically what I am doing is calculating on every bar, looking for to see if the current bar DateTime matches a DateTime I have in a CSV file that I created by hand. Then from the list Object, I grab the trade direction and set a bool to take a position.

using System;
using System.Collections.Generic;
using System.Drawing;
using System.Linq;
using PowerLanguage.Function;
using ATCenterProxy.interop;
using System.IO;
using System.IO.Compression;

namespace PowerLanguage.Strategy
{
    public class Ecal_v1 : SignalObject
    {
        public List<Trades> tradeList = new List<Trades>();
        public List<string> csvList = new List<string>();
        public bool exitOn24 = false;
        public string ecalPath = @"C:\Users\Skynet\OneDrive\Trading\Economic Calendars\backtest1.csv";
        PowerLanguage.Indicator.Bollinger_Bands bb;

        public Ecal_v1(object _ctx):base(_ctx){}

        //[Input]
        //public bool exitOn24 { get; set; }

        [Input]
        public double bbTopOffset { get; set; }
        775
        [Input]
        public double bbBotOffset { get; set; }

        [Input]
        public double longTPMod { get; set; }

        [Input]
        public double shortTPMod { get; set; }

        [Input]
        public double longSLMod { get; set; }

        [Input]
        public double shortSLMod { get; set; }

        //[Input]
        //public double buyTrail { get; set; }

        //[Input]
        //public double sellTrail { get; set; }

        double bbUpperDiff;
        double bbLowerDiff;
        double bbBasis;
        double longTP;
        double shortTP;
        double longSL;
        double shortSL;
        double ptValue;
        public DateTime tradeTime;

        private IOrderMarket longEntry, shortEntry, longExit, shortExit;

        protected override void Create()
        {
            // create variable objects, function objects, order objects etc.
            bb = ((PowerLanguage.Indicator.Bollinger_Bands)AddIndicator("Bollinger_Bands"));

            longEntry = OrderCreator.MarketNextBar(new SOrderParameters(Contracts.Default, EOrderAction.Buy));
            shortEntry = OrderCreator.MarketNextBar(new SOrderParameters(Contracts.Default, EOrderAction.SellShort));
            longExit = OrderCreator.MarketNextBar(new SOrderParameters(Contracts.Default, EOrderAction.Sell));
            shortExit = OrderCreator.MarketNextBar(new SOrderParameters(Contracts.Default, EOrderAction.BuyToCover));
        }

        protected override void StartCalc()
        {
            // assign inputs 
            GetEcal();
            ptValue = Bars.Point;

            longTP = longTPMod;
            longSL = longSLMod;
            shortTP = shortTPMod;
            shortSL = shortSLMod;

        }

        protected override void CalcBar()
        {
            bool LE = false;
            bool SE = false;
            bool LX = false;
            bool SX = false;

            for(int i=0; i<tradeList.Count; i++)
            {
                if(Bars.Time[0] == tradeList.ElementAt(i).time)
                {
                    if (tradeList.ElementAt(i).direction == "Up")
                    {
                        LE = true;
                        tradeList.RemoveAt(i);
                    }
                    else if (tradeList.ElementAt(i).direction == "Down")
                    {
                        SE = true;
                        tradeList.RemoveAt(i);
                    }
                    else
                    {

                    }
                }
            }

            if(exitOn24 == true)
            {
                if (Bars.Time[0] > tradeTime.AddHours(24))
                {
                    LX = true;
                    SX = true;
                }
            }

            if (StrategyInfo.MarketPosition == 0)
            {
                if (LE)
                {
                    longEntry.Send();
                    tradeTime = Bars.Time[0];
                    setLongStops();     
                }
                else if (SE)
                {
                    shortEntry.Send();
                    tradeTime = Bars.Time[0];
                    setShortStops();        
                }
            }

            else if (StrategyInfo.MarketPosition > 0)
            {
                if (LX)
                {
                    longExit.Send();
                }
                else if (LE)
                {
                    longEntry.Send();
                    tradeTime = Bars.Time[0];
                    setLongStops();
                }
                else
                {
                    CurSpecOrdersMode = ESpecOrdersMode.PerPosition;
                    GenerateStopLossPt(longSL);
                    GenerateProfitTargetPt(longTP);
                    //GenerateTrailingStopPt(buyTrail);
                }
            }

            else if (StrategyInfo.MarketPosition < 0)
            {
                if (SX)
                {
                    shortExit.Send();
                }
                else if (SE)
                {
                    shortEntry.Send();
                    tradeTime = Bars.Time[0];
                    setShortStops();
                }
                else
                {
                    CurSpecOrdersMode = ESpecOrdersMode.PerPosition;
                    GenerateStopLossPt(shortSL);
                    GenerateProfitTargetPt(shortTP);
                    //GenerateTrailingStopPt(sellTrail);
                }
            }
        }

        private void GetEcal()
        {
            csvList = File.ReadAllLines(ecalPath).Skip(1).ToList();
            foreach(string line in csvList)
            {
                string[] values = line.Split(',');
                tradeList.Add(new Trades { time = Convert.ToDateTime(values[0]), direction = values[1] });
            }
        }
    }


    public class Trades
    {
        public DateTime time { get; set; }
        public string direction { get; set; }
    }


}

The culprit of the slowdown is the For Loop inside the CalcBar() method.

Uwe Keim
  • 39,551
  • 56
  • 175
  • 291
Cnote
  • 65
  • 6
  • maybe it's faster when you use a switch on `tradeList.ElementAt(i).direction`. – DrNachtschatten May 26 '17 at 08:02
  • 2
    Use a dictionary or a sorted tree keyed on `time` for trades instead of a list. Dictionary lookups are amortized O(1) and tree lookups are O(lgN). Here, that'll be $1M for consultation - surely one millionth of a cent for each loop iteration is not excessive :) – Anton Tykhyy May 26 '17 at 08:07
  • 1
    You may try with a [Parallel.For](https://msdn.microsoft.com/en-us/library/system.threading.tasks.parallel.for(v=vs.110).aspx) – Pikoh May 26 '17 at 08:07
  • 1
    Your `GetEcal` function is slow because you're reading the entire file into memory (`ReadAllLines`) then copying the array twice due to `ToList()`. Then using `String.Split` also wastes memory unnecessarily - you should use a Finite-state-machine based CSV parser - that will significantly cut down on memory usage and runtime. – Dai May 26 '17 at 08:10
  • 1
    Indexing! Indexing! Indexing! Indexing. Especially if the trade list doesn't change often. – Euphoric May 26 '17 at 08:11
  • Also, as @Pikoh also pointed out there is no parallelism in your code - it looks entirely single-threaded, so adding more processors cores to the host system won't help you at all. – Dai May 26 '17 at 08:12
  • I should have been more clear regarding the parallelism. I am using MultiCharts.net as a trading platform, and the method CalcBar() is run parallel after the code has been compiled. What I am trying to do is optimize my stoplosses and take profits levels. So it runs through 1000's of possible combinations. This is the part that is parallel, because it tries 8 different combinations of the [Inputs] simultaneously. – Cnote May 26 '17 at 15:51
  • Thanks for all the ideas everyone! I will implement and update everyone on the factors of improvement in the next few hours. – Cnote May 26 '17 at 16:28

4 Answers4

7

Have you tried to profile this method? We have too little information. For example maybe the most costly operation is

Bars.Time[0] == tradeList.ElementAt(i).time

We don't know that. You should profile it first.

What's next...

tradeList.ElementAt(i).direction == "Up"

Don't use strings. Strings are slow. You can use here enums, which will be optimized to integers and integers comparison is far more faster than strings.

Don't use ElementAt method. Use just [] operator. It's faster.

Consider using Dictionary instead of List. It's much more faster than list. List has to go thru EVERY element to find what you need. Dictionary don't. This maybe really critical part here.

Consider using integers instead of dateTimes. Treat integers as seconds. It will be faster than DateTime.

And use Parallel.ForEach instead of ordinary for. It will then use other cores. Ordinary for probably uses just one core.

Oh, one more thing. If it's stock application, maybe you could try using Neural Network? But that's a completely different story.

Adam Jachocki
  • 1,897
  • 1
  • 12
  • 28
  • This is very helpful. I can use 1/-1 for the trade direction rather than strings "Up" & "Down". I didn't realize ElementAt was so much slower than just []. I think you're right about using a dictionary. I knew there had to be a way to just search for the matching DateTIme value, instead of iterating through each one in the list to find a match. The method CalcBar() is automatically called parallel by the trading software, so it does in fact use all 8 cores. Also, I am using a Neural Network, but just to create the CSV file that I get my trade info from. – Cnote May 26 '17 at 15:45
  • Sorry it took me so long to respond to this, but it took me a while to optimize and implement. The details of this answer allowed me to speed up the processing by a factor of 5. – Cnote Jun 16 '17 at 15:51
6
  • RemoveAt will process the rest of the list to shift every item after the one you removed down one place. see here. This has a huge cost in your case.

    The solution is to use a temporary list in which you add the elements you will remove later, out of the loop (sourceList.Except(removedList)) ; OR just mark somehow your items as removed and never touch the source list.

  • You are loading ALL the lines of your CSV in memory just to read them and create a strongly typed object out of every line.

    You could read the file line-by-line instead, and create your objects.

  • ElementAt could be slower than the indexer. Since you are using lists, just access items with [] to avoid doubts.

  • To use less memory and have faster comparisons, make direction an enum with 'Up' 'Down' values.

You don't take advantage of many cores if you don't parallelize the code. Once you got the thigs right, if the program still takes hours, you can try Parallel.For instead of for. In that case the 'mark an item as removed solution' is simplier and proably more performant than using a concurrent list and feed it with the items to remove.

Mauro Sampietro
  • 2,739
  • 1
  • 24
  • 50
2

For large lists a Hash Set is often a good way to enable better performance. Some more information is here:

https://softwareengineering.stackexchange.com/questions/280361/list-comparing-techniques-for-faster-performance

Or alternatively why not use a dictionary and use the DateTime as your key (you can use just any other type as a dummy value if you don't need to store any more details)

Then, you are effectively matching based on the key, so you either hit or miss.

jason.kaisersmith
  • 8,712
  • 3
  • 29
  • 51
-1

My suggestion is to optimize your process by breaking it down. First, you check:

Bars.Time[0] == tradeList.ElementAt(i).time;

Tom utilizes this to add this to a LINQ statement to filter only those where your condition is met using:

tradeList.Where(t => t.time == Bars.Time[0]);

Now you have another set of if conditions that control if you remove the item:

tradeList.ElementAt(i).direction == "Down" || tradeList.ElementAt(i).direction == "Up";

These can be simplified further using LINQ into:

tradeList.RemoveAll(d => d.direction == "Down" || d => d.Direction == "Up");

Now you can call the RemoveAll method after you Filter using Tom's technique:

tradeList.Where(t => t.time == Bars.Time[0])
    .RemoveAll(d => d.direction == "Up" || d => d.direction == "Down");

This statement for all intents and purposes is working the same as yours, it iterates over the list using a foreach loop. But now we can optimize this using PLINQ. Alright so moving right along with PLINQ, you would alter this statement as:

tradeList.AsParallel().tradeList.Where(t => t.time != Bars.Time[0] 
    && (d => d.direction != "Up" || d => d.direction != "Down"));

I combined the logic from the RemoveAll() into the Where() method, this statement should give you a list of all those bars that shouldn't be removed. Now I'm not sure what was the purpose of the bool flags (LE and SE) you have, but they will become true after the first hit, so there is a better way to do that. But this should get you someplace to start.

Huda Syed
  • 1
  • 2
  • you are telling him something like: ok you are doing things the worst way possible not understanding the cost and meaning of each line of code you write, but you can write them more concisely and parallelize them – Mauro Sampietro May 26 '17 at 11:39
  • Hi Huda, I was under the impression that LINQ is more costly than for() loops. Am I incorrect? – Cnote May 26 '17 at 15:42