6

I am refactoring an application and trying to add an asynchronous version of an existing function to improve performance times in an ASP.NET MVC application. I understand that there is an overhead involved with asynchronous functions, but I expected that with enough iterations, the I/O intensive nature of loading the data from the database would more than compensate for the overhead penalty and that I would receive significant performance gains.

The TermusRepository.LoadByTermusId function loads data by retrieving a bunch of datatables from the database (using ADO.NET and the Oracle Managed Client), populates a model, and returns it. TermusRepository.LoadByTermusIdAsync is similar, except it does so asynchronously, with a slightly different method of loading up datatable download tasks when there's multiple datatables to retrieve.

public async Task<ActionResult> AsyncPerformanceTest()
{
    var vm = new AsyncPerformanceTestViewModel();
    Stopwatch watch = new Stopwatch();
    watch.Start();
    for (int i = 0; i < 60; i++)
    {
        TermusRepository.LoadByTermusId<Termus2011_2012EndYear>("1");
        TermusRepository.LoadByTermusId<Termus2011_2012EndYear>("5");
        TermusRepository.LoadByTermusId<Termus2011_2012EndYear>("6");
        TermusRepository.LoadByTermusId<Termus2011_2012EndYear>("7");
    }
    watch.Stop();
    vm.NonAsyncElapsedTime = watch.Elapsed;
    watch.Reset();
    watch.Start();
    var tasks = new List<Task<Termus2011_2012EndYear>>();
    for (int i = 0; i < 60; i++)
    {
        tasks.Add(TermusRepository.LoadByTermusIdAsync<Termus2011_2012EndYear>("1"));
        tasks.Add(TermusRepository.LoadByTermusIdAsync<Termus2011_2012EndYear>("5"));
        tasks.Add(TermusRepository.LoadByTermusIdAsync<Termus2011_2012EndYear>("6"));
        tasks.Add(TermusRepository.LoadByTermusIdAsync<Termus2011_2012EndYear>("7"));               
    }
    await Task.WhenAll(tasks.ToArray());
    watch.Stop();
    vm.AsyncElapsedTime = watch.Elapsed;            
    return View(vm);
}

public static async Task<T> LoadByTermusIdAsync<T>(string termusId) where T : Appraisal
{
    var AppraisalHeader = new OracleCommand("select tu.termus_id, tu.manager_username, tu.evaluee_name, tu.evaluee_username, tu.termus_complete_date, termus_start_date, tu.termus_status, tu.termus_version, tn.managername from tercons.termus_users tu left outer join tercons.termus_names tn on tu.termus_id=tn.termus_id where tu.termus_id=:termusid");
    AppraisalHeader.BindByName = true;
    AppraisalHeader.Parameters.Add("termusid", termusId);
    var dt = await Database.GetDataTableAsync(AppraisalHeader);
    T Termus = Activator.CreateInstance<T>();
    var row = dt.AsEnumerable().Single();
    Termus.TermusId = row.Field<decimal>("termus_id").ToString();
    Termus.ManagerUsername = row.Field<string>("manager_username");
    Termus.EvalueeUsername = row.Field<string>("evaluee_username");
    Termus.EvalueeName = row.Field<string>("evaluee_name");
    Termus.ManagerName = row.Field<string>("managername");
    Termus.TERMUSCompleteDate = row.Field<DateTime?>("termus_complete_date");
    Termus.TERMUSStartDate = row.Field<DateTime>("termus_start_date");
    Termus.Status = row.Field<string>("termus_status");
    Termus.TERMUSVersion = row.Field<string>("termus_version");
    Termus.QuestionsAndAnswers = new Dictionary<string, string>();

    var RetrieveQuestionIdsCommand = new OracleCommand("select termus_question_id from tercons.termus_questions where termus_version=:termus_version");
    RetrieveQuestionIdsCommand.BindByName = true;
    RetrieveQuestionIdsCommand.Parameters.Add("termus_version", Termus.TERMUSVersion);
    var QuestionIdsDt = await Database.GetDataTableAsync(RetrieveQuestionIdsCommand);
    var QuestionIds = QuestionIdsDt.AsEnumerable().Select(r => r.Field<string>("termus_question_id"));

    //There's about 60 questions/answers, so this should result in 60 calls to the database. It'd be a good spot to combine to a single DB call, but left it this way so I could see if async would speed it up for learning purposes.
    var DownloadAnswersTasks = new List<Task<DataTable>>();
    foreach (var QuestionId in QuestionIds)
    {
        var RetrieveAnswerCommand = new OracleCommand("select termus_response, termus_question_id from tercons.termus_responses where termus_id=:termus_id and termus_question_id=:questionid");
        RetrieveAnswerCommand.BindByName = true;
        RetrieveAnswerCommand.Parameters.Add("termus_id", termusId);
        RetrieveAnswerCommand.Parameters.Add("questionid", QuestionId);
        DownloadAnswersTasks.Add(Database.GetDataTableAsync(RetrieveAnswerCommand));
    }
    while (DownloadAnswersTasks.Count > 0)
    {
        var FinishedDownloadAnswerTask = await Task.WhenAny(DownloadAnswersTasks);
        DownloadAnswersTasks.Remove(FinishedDownloadAnswerTask);
        var AnswerDt = await FinishedDownloadAnswerTask;
        var Answer = AnswerDt.AsEnumerable().Select(r => r.Field<string>("termus_response")).SingleOrDefault();
        var QuestionId = AnswerDt.AsEnumerable().Select(r => r.Field<string>("termus_question_id")).SingleOrDefault();
        if (!String.IsNullOrEmpty(Answer))
        {
            Termus.QuestionsAndAnswers.Add(QuestionId, System.Net.WebUtility.HtmlDecode(Answer));
        }
    }
    return Termus;
}

public static async Task<DataTable> GetDataTableAsync(OracleCommand command)
{
    DataTable dt = new DataTable();
    using (var connection = GetDefaultOracleConnection())
    {
        command.Connection = connection;
        await connection.OpenAsync();
        dt.Load(await command.ExecuteReaderAsync());
    }
    return dt;
}

public static T LoadByTermusId<T>(string TermusId) where T : Appraisal
{
    var RetrieveAppraisalHeaderCommand = new OracleCommand("select tu.termus_id, tu.manager_username, tu.evaluee_name, tu.evaluee_username, tu.termus_complete_date, termus_start_date, tu.termus_status, tu.termus_version, tn.managername from tercons.termus_users tu left outer join tercons.termus_names tn on tu.termus_id=tn.termus_id where tu.termus_id=:termusid");
    RetrieveAppraisalHeaderCommand.BindByName = true;
    RetrieveAppraisalHeaderCommand.Parameters.Add("termusid", TermusId);
    var AppraisalHeaderDt = Database.GetDataTable(RetrieveAppraisalHeaderCommand);
    T Termus = Activator.CreateInstance<T>();
    var AppraisalHeaderRow = AppraisalHeaderDt.AsEnumerable().Single();
    Termus.TermusId = AppraisalHeaderRow.Field<decimal>("termus_id").ToString();
    Termus.ManagerUsername = AppraisalHeaderRow.Field<string>("manager_username");
    Termus.EvalueeUsername = AppraisalHeaderRow.Field<string>("evaluee_username");
    Termus.EvalueeName = AppraisalHeaderRow.Field<string>("evaluee_name");
    Termus.ManagerName = AppraisalHeaderRow.Field<string>("managername");
    Termus.TERMUSCompleteDate = AppraisalHeaderRow.Field<DateTime?>("termus_complete_date");
    Termus.TERMUSStartDate = AppraisalHeaderRow.Field<DateTime>("termus_start_date");
    Termus.Status = AppraisalHeaderRow.Field<string>("termus_status");
    Termus.TERMUSVersion = AppraisalHeaderRow.Field<string>("termus_version");
    Termus.QuestionsAndAnswers = new Dictionary<string, string>();

    var RetrieveQuestionIdsCommand = new OracleCommand("select termus_question_id from tercons.termus_questions where termus_version=:termus_version");
    RetrieveQuestionIdsCommand.BindByName = true;
    RetrieveQuestionIdsCommand.Parameters.Add("termus_version", Termus.TERMUSVersion);
    var QuestionIdsDt = Database.GetDataTable(RetrieveQuestionIdsCommand);
    var QuestionIds = QuestionIdsDt.AsEnumerable().Select(r => r.Field<string>("termus_question_id"));
    //There's about 60 questions/answers, so this should result in 60 calls to the database. It'd be a good spot to combine to a single DB call, but left it this way so I could see if async would speed it up for learning purposes.
    foreach (var QuestionId in QuestionIds)
    {
        var RetrieveAnswersCommand = new OracleCommand("select termus_response from tercons.termus_responses where termus_id=:termus_id and termus_question_id=:questionid");
        RetrieveAnswersCommand.BindByName = true;
        RetrieveAnswersCommand.Parameters.Add("termus_id", TermusId);
        RetrieveAnswersCommand.Parameters.Add("questionid", QuestionId);
        var AnswersDt = Database.GetDataTable(RetrieveAnswersCommand);
        var Answer = AnswersDt.AsEnumerable().Select(r => r.Field<string>("termus_response")).SingleOrDefault();
        if (!String.IsNullOrEmpty(Answer))
        {
            Termus.QuestionsAndAnswers.Add(QuestionId, System.Net.WebUtility.HtmlDecode(Answer));
        }
    }
    return Termus;
}

public static DataTable GetDataTable(OracleCommand command)
{
    DataTable dt = new DataTable();
    using (var connection = GetDefaultOracleConnection())
    {
        command.Connection = connection;
        connection.Open();
        dt.Load(command.ExecuteReader());
    }
    return dt;
}

public static OracleConnection GetDefaultOracleConnection()
{
    return new OracleConnection(ConfigurationManager.ConnectionStrings[connectionstringname].ConnectionString);
}

Results for 60 iterations are:

Non Async 18.4375460 seconds

Async     19.8092854 seconds

The results of this test are consistent. No matter how many iterations I go through of the for loop in AsyncPerformanceTest() action method, the async stuff runs about 1 second slower than the non-async. (I run the test multiple times in a row to account for the JITter warming up.) What am I doing wrong that's causing the async to be slower than the non-async? Am I misunderstanding something fundamental about writing asynchronous code?

mason
  • 31,774
  • 10
  • 77
  • 121
  • Do you compare `async` to single- or multiple-thread to conclude *I would receive significant performance gains*? – Sinatr Feb 16 '15 at 15:20
  • @Sinatr My expectation was for a single user accessing the site. If he were to make one request that ran my non-async code, I would expect that to perform several times slower than if he were to make a request to my async code. Ignoring the possibility of multiple users accessing the site at once. So I believe my answer to your question is single threaded. – mason Feb 16 '15 at 15:25
  • @mason I suggest you test the performance result using something like [JMeter](http://jmeter.apache.org/) to stress test your site. With single threaded access, it's logical that you'll see more overhead than gain. – Yuval Itzchakov Feb 16 '15 at 15:27
  • @mason There's no reason to believe that `async` would be faster in this case. Take a look at this question: [How to measure performance of awaiting asynchronous operations?](http://stackoverflow.com/q/23359829/885318) – i3arnon Feb 16 '15 at 15:30
  • @i3arnon You said in your answer on that question `To measure that you need to have a lot of async operations concurrently`, but I believe I have that. There's about 60 answers to retrieve, and I queue them all up into `DownloadAnswersTasks` and await them to return. My understanding is that the database operations get kicked off together, and then I await for each of those to return and process them as soon as they return. – mason Feb 16 '15 at 15:36
  • @mason Making the code asynchronous doesn't make it parallelized. And having the code be synchronous doesn't mean it can't be parallelized. See [this post](http://stackoverflow.com/a/23833635/1159478) for an explanation of the difference between asyncrhony and parallelism. – Servy Feb 16 '15 at 15:41
  • @Servy But my understanding of how async works is that when I await the tasks `var FinishedDownloadAnswerTask = await Task.WhenAny(DownloadAnswersTasks); ` at some point it will create the requests to the database at the same time, so that the actual database call runs in near-parallel (and there's about 60 calls to the DB right there, and that should be the most intensive part) and then I process those as soon as the previous one finishes a database call. So while async doesn't automatically make it parallel, shouldn't calls database calls (or web service calls) run largely in parallel? – mason Feb 16 '15 at 15:48
  • @mason 1) If one implementation is parallelized and another is serialized, then that has nothing to do with asynchrony; it is an entirely different property of each approach that has very little to do with whether they are asynchronous or synchronous. 2) That would *allow* the operations to be parallelized, but that doesn't necessarily mean that they will be. The network connection, database, or another aspect of the application could be serializing the operations in some way, preventing them from actually executing in parallel. – Servy Feb 16 '15 at 15:52
  • @mason Of course, in addition to all of that, you shouldn't be performing 60 DB queries in the first place. Just do a Join and use *one* query to fetch all of the data. That's going to be radically faster than any parallelized approach. – Servy Feb 16 '15 at 15:53
  • @Servy Yes, I realize 60 DB calls for the same query at the same time is a bit silly, but I figured doing it that way would give me a chance to try using async in order to gain a better understanding. I'm not quite *that* dumb ;) – mason Feb 16 '15 at 15:54
  • @Servy If I suspect that something would be serializing the operations in some way, where would I go to look for that? (I just updated my question to include the call to `GetDefaultOracleConnection()` which is the only code referenced that I didn't show. – mason Feb 16 '15 at 15:58
  • 260 "concurrent" tasks, why would you do that to yourself? – Peter Ritchie Feb 16 '15 at 16:03
  • @PeterRitchie Because I watched some videos where Microsoft employees demonstrated the use of async by making a bunch of I/O calls at the same time, and saw some impressive performance gains (10-25x faster) and I was hoping to create what they did in my own application. – mason Feb 16 '15 at 16:05
  • I don't know what you viewed; but it was likely showing asynchronous IO (that's not the same as any-old IO through asynchronous calls) I don't see where you're doing "async IO" here. – Peter Ritchie Feb 16 '15 at 16:09
  • @PeterRitchie In `var FinishedDownloadAnswerTask = await Task.WhenAny(DownloadAnswersTasks);`? That's taking a queue of asynchronous database calls and processing them as they finish, right? – mason Feb 16 '15 at 16:15
  • @mason, sure, but that doesn't mean that's "asynchronous IO". "asynchronous IO" is making use of *only* an IO thread pool thread that performs "overlapped IO" via the driver (network, filesystem, etc.), and not a worker thread pool thread to *wait* for the IO to complete. – Peter Ritchie Feb 16 '15 at 16:22
  • @mason: See the "not a silver bullet" section of my [async ASP.NET MSDN article](https://msdn.microsoft.com/en-us/magazine/dn802603.aspx) - for a single web/single db architecture, the web server is almost certainly not your bottleneck. Furthermore, last I checked, the default Oracle ADO.NET driver does not support async operations; you'd have to go third-party for that. – Stephen Cleary Feb 17 '15 at 02:58
  • @StephenCleary The built-in [System.Data.OracleClient](https://msdn.microsoft.com/en-us/library/system.data.oracleclient.aspx) is deprecated, and I haven't used that in a long time. Most people have moved to ODP.NET installed with the Oracle client, but I've moved to the [managed ODP.NET driver](https://www.nuget.org/packages/odp.net.managed/) since it's much easier to configure. That driver has async built in, and you can see in `GetDataTableAsync()` of my question that I make use of it. Thanks for the article- I think I'm going to be buying your book soon so I can get a better understanding. – mason Feb 17 '15 at 03:06
  • Ignore my last comment. It has async methods, but it's really running synchronous underneath. See my answer for more detail. – mason Mar 13 '15 at 14:32
  • @binki If you read the code again, I use ExecuteReaderAsync in the async methods, and ExecuteReader in the non-async methods. I provided examples of each. – mason Mar 19 '18 at 15:22

2 Answers2

11

The asynchronous version will always be slower than the synchronous version when there is no concurrency. It's doing all of the same work as the non-async version, but with a small amount of overhead added to manage the asynchrony.

Asynchrony is advantageous, with respect to performance, by allowing improved availability. Each individual request will be slower, but if you make 1000 requests at the same time, the asynchronous implementation will be able to handle them all more quickly (at least in certain circumstances).

This happens because the asynchronous solution allows the thread that was allocated to handle the request to go back to the pool and handle other requests, whereas the synchronous solution forces the thread to sit there and do nothing while it waits for the asynchronous operation to complete. There is overhead in structuring the program in a way that allows the thread to be freed up to do other work, but the advantage is the ability of that thread to go do other work. In your program there is no other work for the thread to go do, so it ends up being a net loss.

Servy
  • 202,030
  • 26
  • 332
  • 449
  • I disagree. The asynchronous implementation is still going to be slower for I/O bound processes. Since its the I/O thats doing the bounding. What you get is a perceived performance gain from a more responsive UI. – Aron Feb 16 '15 at 15:23
  • 1
    @Aron The UI is no more responsive with an asynchronous request handling for an ASP application. There are very real potential performance gains, but they come out of the ability of the thread pool threads to be re-used for other requests, which of course can't happen in this test as there are no other requests to be worked on. – Servy Feb 16 '15 at 15:25
  • 1
    @Aron And my point is that an ASP application doesn't *have* a message pump. It's an ASP application, not a desktop application. – Servy Feb 16 '15 at 15:28
  • @Servy So you're saying that if I had 1,000 users running the code at the same time, they would be much faster on the async code than the non-async code? I have trouble reconciling that in my mind with some of the demonstrations I've seen where a bunch of tasks had been queued up. I would expect `DownloadAnswersTasks` in `LoadByTermusIdAsync` to all run somewhat parallel (there's approximately 60 answers to retrieve, I'll update my question to point that out). – mason Feb 16 '15 at 15:29
  • 3
    @mason Asynchrony and parallelism are radically different concepts. Something being asynchronous doesn't mean it is parallelized, and something being synchronous doesn't mean its serialized. And of course parallelizing an operation isn't always necessarily quicker either, depending on the context. – Servy Feb 16 '15 at 15:31
  • In the typical ASP workload the largest percentage of time is used in waiting for the Database to respond (including latency). With a well optimised application, a single thread can easily run thousands if not millions of concurrent requests WITH A SINGLE THREAD. However even the most powerful thread based server would have trouble with a few thousand threads/requests. Async works best when the majority of your workload is latency based. – Aron Feb 16 '15 at 15:38
  • @Aron It doesn't even need to be the majority of the work, just enough that you'd rather those threads be able to handle other requests in the meantime. As long as the work that the thread is able to do while the asynchronous operation is ongoing accomplishes more than the cost of the overhead of making the code asynchronous, it's a net win. – Servy Feb 16 '15 at 15:40
  • @mason The trouble is traditional multi-tasking used something called a "thread", which is extremely heavy weight (several MB of stack memory needs to be allocated). If you needed to have a thread per request, then 1000 requests would take up GBs of memory without even doing anything. Further more, most of those 1000 threads would not be doing anything for most of the time. In actual fact, you can't actually spin up 1000s of threads. So async is faster as it handles the 1000 requests. – Aron Feb 16 '15 at 15:41
  • @mason Async works faster in this case because it allows you to reuse the threads, instead of each of the threads "blocking" (ie sitting on their hands waiting for the SQL server). A further upshot of this is that modern OS preemptive multi-tasking makes context switching between threads obscenely expensive (especially when you have 1000s of threads). – Aron Feb 16 '15 at 15:45
  • With 260 CPU-bound tasks (I didn't check if they *are* CPU bound in this example, just stating an example that shows keeping everything off the UI doesn't always make the UI responsive) that will affect the entire performance of the system--including the UI of the app. There's a demo I do of that very same situation in a talk I do about having only one CPU-bound operation per CPU core, and no more. Throwing more threads at a problem does not guarantee faster results. – Peter Ritchie Feb 16 '15 at 16:07
  • @PeterRitchie This question involves primarily IO bound work, not CPU bound work, and it's an ASP application not a desktop application, so it's not about a responsive UI at all. – Servy Feb 16 '15 at 16:09
  • Is it *really* IO bound? Or is it CPU bound with a pool thread waiting for IO to complete? Is there something that documents that OracleCommand is really IO bound? – Peter Ritchie Feb 16 '15 at 16:13
  • @PeterRitchie The underlying operation is a database query, which is a network transaction, so yes, it's inherently IO bound. The operation is IO bound whether its synchronous or asynchronous. Having a thread sit there waiting on the IO makes it still be IO bound. As to whether the methods marked as `Async` and returning `Task` objects are actually asynchronous, is another matter. – Servy Feb 16 '15 at 16:16
  • From an API point of view, just because it's a network transaction doesn't make it only IO bound. You can just as easily block a thread waiting for IO to complete (the IO operation itself occuring on another, IO, thread). For example, `DbCommand.ExecutReadAsync` is really just a wrapper around a synchronous call to `ExecuteReader` – Peter Ritchie Feb 16 '15 at 16:18
  • @PeterRitchie As I said, even a synchronous application that's mostly doing IO is an IO bound application. Whether something is IO/CPU bound is an entirely separate property from whether its asynchronous/synchronous. You can have an IO bound synchronous application, an CPU bound asynchronous application, etc. They're orthogonal properties of the program. – Servy Feb 16 '15 at 16:20
  • Miscommunication; a dedicated thread waiting for IO to complete isn't really only "IO bound", and technically isn't really CPU bound either because it's not pegging a CPU. But, non-overlapped asynchronous IO still has an effect on performance compared overlapped asynchronous IO. – Peter Ritchie Feb 16 '15 at 16:29
  • @PeterRitchie See [a definition of IO bound](http://en.wikipedia.org/wiki/I/O_bound). An IO bound application is one that is principally spent waiting for Input/Output operations. There is no requirement that it be asynchronous. A thread sitting there waiting on IO to finish is an IO bound operation *by definition*. – Servy Feb 16 '15 at 16:31
  • @Servy Thanks for your explanation, however I don't think I'm capable of fully understanding your response or the comments until I've matured a bit more as a programmer. My academic training for multithreaded coding was regrettably brief. Perhaps I'll read some books about async and multithreading in C# in the hopes I'll be able to better understand this in the future. Until then, I'll just go forward with fewer expectations about the performance gains I might expect from async. – mason Feb 16 '15 at 16:45
6

Turns out the Oracle Managed Driver is "fake async", which would partially explain why my async code is running slower.

Community
  • 1
  • 1
mason
  • 31,774
  • 10
  • 77
  • 121
  • Well, if you look at the OP’s code, you can see that the fake async isn’t even being used—the OP is calling `dt.Load(command.ExecuteReader())`. – binki Mar 19 '18 at 15:10
  • @binki I *wrote* the original code. Look at it again. I provided both async and non-async versions. – mason Mar 19 '18 at 15:21
  • Ah, sorry, I went looking for `GetDataTableAsync()` and then scrolled past. It seems to me that this should be the accepted answer. – binki Mar 19 '18 at 15:27
  • They published a new version 18.3 (last one was 12.2) 7 days ago. https://www.nuget.org/packages/Oracle.ManagedDataAccess/18.3.0 maybe this new one really is async. Can anyone confirm that? – lmcarreiro Sep 05 '18 at 12:19