You can give this function a try, as I stated in comments, this will be much faster and memory friendly if you use the .NET APIs instead of treating the CSV as objects.
This function uses the StreamReader
and StreamWriter
classes to read and write line by line.
Usage examples can as well as it's Merge-Csv
counterpart are available through the PS Gallery as well as in the official GitHub Repo.
using namespace System.IO
using namespace System.Text
using namespace System.Management.Automation
using namespace System.Management.Automation.Language
using namespace System.Collections
using namespace System.Collections.Generic
# All Credits to jborean93 on the EncodingTransformation Class
# Source: https://gist.github.com/jborean93/50a517a8105338b28256ff0ea27ab2c8#file-get-extendedattribute-ps1
class EncodingTransformation : ArgumentTransformationAttribute {
[object] Transform([EngineIntrinsics] $EngineIntrinsics, [object] $InputData) {
$outputData = switch($InputData) {
{ $_ -is [Encoding] } { $_ }
{ $_ -is [string] } {
switch ($_) {
ASCII { [ASCIIEncoding]::new() }
BigEndianUnicode { [UnicodeEncoding]::new($true, $true) }
BigEndianUTF32 { [UTF32Encoding]::new($true, $true) }
ANSI {
$raw = Add-Type -Namespace Encoding -Name Native -PassThru -MemberDefinition '
[DllImport("Kernel32.dll")]
public static extern Int32 GetACP();
'
[Encoding]::GetEncoding($raw::GetACP())
}
OEM { [Console]::OutputEncoding }
Unicode { [UnicodeEncoding]::new() }
UTF8 { [UTF8Encoding]::new($false) }
UTF8BOM { [UTF8Encoding]::new($true) }
UTF8NoBOM { [UTF8Encoding]::new($false) }
UTF32 { [UTF32Encoding]::new() }
default { [Encoding]::GetEncoding($_) }
}
}
{ $_ -is [int] } { [Encoding]::GetEncoding($_) }
default {
throw [ArgumentTransformationMetadataException]::new(
"Could not convert input '$_' to a valid Encoding object."
)
}
}
return $outputData
}
}
class EncodingCompleter : IArgumentCompleter {
[string[]] $EncodingSet = @(
'ascii'
'bigendianutf32'
'unicode'
'utf8'
'utf8NoBOM'
'bigendianunicode'
'oem'
'utf7'
'utf8BOM'
'utf32'
'ansi'
)
[IEnumerable[CompletionResult]] CompleteArgument (
[string] $commandName,
[string] $parameterName,
[string] $wordToComplete,
[CommandAst] $commandAst,
[IDictionary] $fakeBoundParameters
) {
[CompletionResult[]] $arguments = foreach($enc in $this.EncodingSet) {
if($enc.StartsWith($wordToComplete)) {
[CompletionResult]::new($enc)
}
}
return $arguments
}
}
function Split-Csv {
[CmdletBinding(DefaultParameterSetName = 'ByChunks')]
param(
[Parameter(Position = 0, Mandatory, ValueFromPipeline, ValueFromPipelineByPropertyName)]
[alias('FullName')]
[string] $Path,
[Parameter(Mandatory)]
[string] $DestinationFolder,
[Parameter(ParameterSetName = 'BySize')]
[int64] $Size = 1kb,
[Parameter(ParameterSetName = 'ByChunks')]
[int32] $Chunks = 3,
[Parameter()]
[EncodingTransformation()]
[ArgumentCompleter([EncodingCompleter])]
[Encoding] $Encoding = 'utf8',
[Parameter()]
[switch] $PassThru
)
begin {
$Destination = $PSCmdlet.GetUnresolvedProviderPathFromPSPath($DestinationFolder)
class ChunkWriter {
[FileInfo] $Source
[string] $Destination
[string] $Headers
[string] $Format
[Encoding] $Encoding
[StreamWriter] GetNewWriter([int32] $Index) {
$name = [string]::Format(
'{0} - Part {1}{2}',
$this.Source.BaseName,
$Index.ToString($this.Format),
$this.Source.Extension
)
$newChunk = Join-Path $this.Destination -ChildPath $name
$writer = [StreamWriter]::new($newChunk, $false, $this.Encoding)
$writer.AutoFlush = $true
$writer.WriteLine($this.Headers)
return $writer
}
}
}
process {
try {
[FileInfo] $Path = $PSCmdlet.GetUnresolvedProviderPathFromPSPath($Path)
$null = [Directory]::CreateDirectory($Destination)
$reader = [StreamReader]::new($Path.FullName, $Encoding, $true)
$headers = $reader.ReadLine()
$Index = 0
if($PSCmdlet.ParameterSetName -eq 'ByChunks') {
$chunkSize = ($Path.Length - $headers.Length) / $Chunks + ($headers.Length * $Chunks)
$format = 'D{0}' -f $Chunks.ToString().Length
}
else {
$chunkSize = $Size - $headers.Length
$format = 'D{0}' -f [math]::Ceiling($Path.Length / $Size).ToString().Length
}
$chunkWriter = [ChunkWriter]@{
Source = $Path
Destination = $Destination
Headers = $headers
Format = $format
Encoding = $Encoding
}
$writer = $chunkWriter.GetNewWriter($Index++)
while(-not $reader.EndOfStream) {
if($writer.BaseStream.Length -ge $chunkSize) {
$writer.Dispose()
if($PassThru.IsPresent) {
$writer.BaseStream.Name -as [FileInfo]
}
$writer = $chunkWriter.GetNewWriter($Index++)
}
$writer.WriteLine($reader.ReadLine())
}
}
catch {
$PSCmdlet.ThrowTerminatingError($_)
}
finally {
$writer, $reader | ForEach-Object Dispose
if($PassThru.IsPresent) {
$writer.BaseStream.Name -as [FileInfo]
}
}
}
}