How to read a CSV file from SFTP and use CSVHelper to parse the content without saving CSV locally?
Is this possible, or do we have to save it locally, parse and delete the file?
I am using SSH.Net and CSVHelper.
How to read a CSV file from SFTP and use CSVHelper to parse the content without saving CSV locally?
Is this possible, or do we have to save it locally, parse and delete the file?
I am using SSH.Net and CSVHelper.
It needs to rely on Stream-processing of file:
public async Task ProcessRemoteFilesAsync()
{
var credentials = new Credentials("host", "username", "password");
var filePaths = new List<string>();
// initializing filePaths ..
var tasks = filePaths
.Select(f => ParseRemoteFileAsync(credentials, f))
.ToArray();
var results = await Task.WhenAll(tasks).ConfigureAwait(false);
// traverse through results..
}
public async Task<FileContent> ParseRemoteFileAsync(Credentials credentials, string filePath)
{
using (var sftp = new SftpClient(credentials.host, credentials.username, credentials.password))
{
sftp.Connect();
try
{
using (var remoteFileStream = sftp.OpenRead(filePath))
{
using (var reader = new StreamReader(remoteFileStream))
{
using (var csv = new CsvReader(reader))
{
/*
// Example of CSV parsing:
var records = new List<Foo>();
csv.Read();
csv.ReadHeader();
while (csv.Read())
{
var record = new Foo
{
Id = csv.GetField<int>("Id"),
Name = csv.GetField("Name")
};
records.Add(record);
}
*/
}
}
}
}
finally {
sftp.Disconnect();
}
}
}
Modified version that uses pool of SftpClient
See C# Object Pooling Pattern implementation.
Implementation of pool borrowed from How to: Create an Object Pool by Using a ConcurrentBag:
/// <summary>
/// Implementation borrowed from [How to: Create an Object Pool by Using a
/// ConcurrentBag](https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/how-to-create-an-object-pool).
/// </summary>
/// <typeparam name="T"></typeparam>
public class ObjectPool<T> : IDisposable
where T : IDisposable
{
private readonly Func<T> _objectGenerator;
private readonly ConcurrentBag<T> _objects;
public ObjectPool(Func<T> objectGenerator)
{
_objectGenerator = objectGenerator ?? throw new ArgumentNullException(nameof(objectGenerator));
_objects = new ConcurrentBag<T>();
}
public void Dispose()
{
while (_objects.TryTake(out var item))
{
item.Dispose();
}
}
public T GetObject()
{
return _objects.TryTake(out var item) ? item : _objectGenerator();
}
public void PutObject(T item)
{
_objects.Add(item);
}
}
The simplest Pool-based implementation (it doesn't care about exception processing, retry-policies):
internal class SftpclientTest
{
private readonly ObjectPool<SftpClient> _objectPool;
public SftpclientTest(Credentials credentials)
{
_objectPool = new ObjectPool<SftpClient>(() =>
{
var client = new SftpClient(credentials.host, credentials.username, credentials.password);
client.Connect();
return client;
});
}
public void GetDirectoryList()
{
var client = _objectPool.GetObject();
try
{
// client.ListDirectory() ..
}
finally
{
if (client.IsConnected)
{
_objectPool.PutObject(client);
}
}
}
public async Task ProcessRemoteFilesAsync()
{
var filePaths = new List<string>();
// initializing filePaths ..
var tasks = filePaths
.Select(f => ParseRemoteFileAsync(f))
.ToArray();
var results = await Task.WhenAll(tasks).ConfigureAwait(false);
// traverse through results..
}
public Task<FileContent> ParseRemoteFileAsync(string filePath)
{
var client = _objectPool.GetObject();
try
{
using (var remoteFileStream = client.OpenRead(filePath))
{
using (var reader = new StreamReader(remoteFileStream))
{
using (var csv = new CsvReader(reader))
{
// ..
}
}
return Task.FromResult(new FileContent());
}
}
finally
{
if (client.IsConnected)
{
_objectPool.PutObject(client);
}
}
}
}