1

The program is written in C#. It gets data one by one sequentially from event hub and makes an entry to sql db.

To improve is performance, I made the sql insert async.

In my local machine 8 GB ram and 4 core, Async version of code performs faster than the sync version.

Where as in Dev server/environment (single core, 1.75 GB). Async version performs slower than sync version.

I scaled up Dev server/environment (2 cores, 3.5 gb). Async version is improved but still performs slower than sync version.

My understanding irrespective of the CPU cores and ram size, async version should perform better than sync version.

Your thoughts on this?

Code snippet

// ManageGeoSpetialData method will be called for every event that will be //pulled from event hub.

public class EventConsumer : IEventConsumer
{
    private static readonly ILog4NetService Log = new Log4NetService(MethodBase.GetCurrentMethod().DeclaringType);
    private readonly IConsumerFactory _consumerFactory;
    private readonly IPublisherFactory _publisherFactory;
    private readonly IDataAccessLayer _dataAccess;
    private IPublisher _ehPublisher;

    public EventConsumer(IConsumerFactory consumerFactory, IPublisherFactory publisherFactory, IDataAccessLayer dataAccess)
    {
        _consumerFactory = consumerFactory;
        _publisherFactory = publisherFactory;
        _dataAccess = dataAccess;
    }

    public void Process()
    {
        Log.Info("CheckPointLogic Process Called ");
        try
        {
            ManageGeoSpetialLogic("Eventhub","Eventhub");
        }
        catch (Exception ex)
        {
            Log.Error("Error in CheckPointLogic Process Method : " + ex.Message);
        }
    }

    private void ManageGeoSpetialLogic(string consumerName, string publisherName)
    {
        Log.Info("Manage CheckPointLogic Called with Consumer : " + consumerName);
        _ehPublisher = _publisherFactory.Get(publisherName);
        var consumer = _consumerFactory.Get(consumerName);
        consumer.Consume(ManageGeoSpetialData);
        Log.Info("Consumer Method called ");
    }

    public async void ManageGeoSpetialData(object data)
    {
            try
            {
                _ehPublisher = _ehPublisher ?? _publisherFactory.Get(Apps.Terra.Messaging.Publisher.Constants.PublisherTypes.Eventhub);
                GeoBoundaryEvent geoBoundaryInfo = null;
                object transactionID = new object();
                if (data is EventData eventInfo)
                {
                    var value = Encoding.UTF8.GetString(eventInfo.Body.ToArray());
                    geoBoundaryInfo = JsonConvert.DeserializeObject<GeoBoundaryEvent>(value);
                    geoBoundaryInfo.SetEventType();
                    eventInfo.Properties.TryGetValue(EventProperties.TransactionIdProperty, out transactionID);
                }

                if (geoBoundaryInfo != null)
                {
                    geoBoundaryInfo.AssetLocationTimestamp = DateTime.ParseExact(geoBoundaryInfo.AssetLocationTimestamp.ToString("yyyy-MM-dd HH:mm:ss.fff"), "yyyy-MM-dd HH:mm:ss.fff", null);
                    geoBoundaryInfo.AssetLocationTimestamp = DateTime.SpecifyKind(geoBoundaryInfo.AssetLocationTimestamp, DateTimeKind.Utc);

                    // geoBoundaryInfo.AssetGeofenceID = _dataAccess.AddGeofenceEventInfo(geoBoundaryInfo); //Db Call

                    geoBoundaryInfo.AssetGeofenceID = await _dataAccess.AddGeofenceEventInfoAsync(geoBoundaryInfo);

                    EventData eventData = new EventData(Encoding.UTF8.GetBytes(JsonConvert.SerializeObject(geoBoundaryInfo)));
                    _ehPublisher.Publish(eventData);   //EH publish
                }
            }
            catch (Exception ex)
            {
                Log.Error($"Error in ManageGeoSpetialData {ex.Message}");
            }
    }
}


//Business layer method
public async Task<long> AddGeofenceEventInfoAsync(GeoBoundaryEvent geoBoundaryEvent)
    {
        //Need to change query for update
        var query = @"INSERT INTO AssetGeofence(AssetType,AssetDSN,AssetLocationDatetime,AlertDateTime,GeoBoundaryAssetType,GeoBoundaryAssetDSN,fk_EventTypeID,GeoFenceName,GeoFenceID,IsActive) Output Inserted.AssetGeofenceID values 
                     (@AssetType,@AssetDeviceSerialNumber,@AssetLocationTimestamp,@AlertTimestamp,@GeoBoundaryAssetType,@GeoBoundaryAssetDeviceSerialNumber,@fk_EventTypeID,@GeoFenceName,@GeoFenceId,1);";
        var task = _dbConnection.ExecuteQueryAsync<long>(query, geoBoundaryEvent);
        var result = await task;
        return result[0];


    }

//Data layer method
    public async Task<List<T>> ExecuteQueryAsync<T>(string sql, object param)
    {
         using (var connection = GetConnection())
         {
            connection.Open();
            var task = connection.QueryAsync<T>(sql, param, commandTimeout: 100);
            await task;
            var result = task.GetAwaiter().GetResult().ToList();
            connection.Close();
            return result;
        }
    }
venkatesh k
  • 49
  • 1
  • 9
  • 2
    no code we can guess all day. adding code help us understand what you are doing. – Seabizkit Nov 30 '18 at 07:40
  • are the db the same with the same volume? – Seabizkit Nov 30 '18 at 07:41
  • @seabizkit both local machine and dev server uses the same db. – venkatesh k Nov 30 '18 at 07:46
  • 4
    Async isn't meant to go *faster*, it's meant to avoid blocking and thread waste, thus improving scalability. It won't improve the performance of bad access strategies or bad queries – Panagiotis Kanavos Nov 30 '18 at 07:46
  • Inserting records one by one will *always* be slow, blocking or not. That's because you pay the network roundtrip, logging and blocking for every single row. Use SqlBulkCopy if you want to insert a lot of records. If you can't, batch multiple insert statements together or pass multiple values in one INSERT statement with `INSERT ..VALUES (...),(...)`. – Panagiotis Kanavos Nov 30 '18 at 07:47
  • If you want a more specific answer, post your code. – Panagiotis Kanavos Nov 30 '18 at 07:48
  • @PanagiotisKanavos I understand async is to avoid blocking main thread, when an IO call is made. Processing 1000 records one by one, with main thread and sync db calls is faster, compared to async db calls. Thats what i dont understand. I have that option of bulk insert in my mind, but still I am trying to understand why async is not performing better when processing 1000 records. – venkatesh k Nov 30 '18 at 09:12
  • @venkateshk because it's not meant to go faster. Real async operations *release* the current thread while an asynchronous operation runs. When that operation completes, the OS itself notifies the application and the application resumes execution. `async/await` hide the callbacks and restore the execution environment to what it was before the async operation started. All that takes additional time. If the query returns results, retrieval is asynchronous as well – Panagiotis Kanavos Nov 30 '18 at 09:22
  • 1
    Why would you assume it would be faster? The async version still only ever has one thing going on if it has anything going on, same as the sync version. All you're getting is overhead from context switching. It bears repeating: async should never be expected to be faster than sync. It can only ever be *just as fast* in the best case. It only starts to become faster when you can leverage parallel processing, and then it typically scales better than spinning up threads for the sync versions. (Also, [don't use `async void`](https://haacked.com/archive/2014/11/11/async-void-methods/).) – Jeroen Mostert Nov 30 '18 at 09:24
  • I am not expecting the processing time of each and every record to be faster. When an async db call is called, the main thread is free to pull the next record from event hub and process it, before the db call completes its works. so applying this for every 1000 records, the cumulative processing time should be lowered. – venkatesh k Nov 30 '18 at 09:45
  • @venkateshk no, that's not what asynchronous means. It means that the thread is *released-released* to do other work (eg respond to UI messages) until the response arrives. When that response arrives, the thread continutes on the next operation in that code block. If that is another insert the process repeats itself. – Panagiotis Kanavos Nov 30 '18 at 09:48
  • @venkateshk *one* connection can only execute *one* command at a time. Dont't try to "speed up" things by using multiple connections and commands though, you'll actually increase delays as multiple connections contend for the *same* network bandwidth, server IO and take locks that block each other. – Panagiotis Kanavos Nov 30 '18 at 09:50
  • @venkateshk even if what you said was true, sending *one* INSERT command with 1000 values would *still* be faster – Panagiotis Kanavos Nov 30 '18 at 09:51
  • @PanagiotisKanavos I simply couldn't get this, why a program with sync db call performs better than an async db call. I just made a sample program that make 100 db calls to the same db, same table. 100 iteration async 20373 ms, 100 iteration sync 28110 ms. Is there any link or blog which explains your concept elaborately. – venkatesh k Nov 30 '18 at 13:01
  • @venkateshk I already explained why. Read the comments again. Stop assuming `async` means the next command will run immediatelly. It does *NOT*. Any blog you read will say the same things. And if you care about performance stop issuing individual INSERTs – Panagiotis Kanavos Nov 30 '18 at 13:04
  • @PanagiotisKanavos I will go through the blogs again. Thank you for your time – venkatesh k Nov 30 '18 at 13:25

0 Answers0