11

I have a byte array:

byte[] bytes;  // many elements

I need to divide it into subsequence of byte arrays of X elements. For example, x = 4.

If bytes.Length does not multiply by X, then add 0 to last subsequence array so Length of all subsequnce must be X.

Linq available.

PS: my attempts

static void Main(string[] args)
{
    List<byte> bytes = new List<byte>() { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };

    int c = bytes.Count / 4;

    for (int i = 0; i <= c; i+=4)
    {
        int diff = bytes.Count - 4;

        if (diff < 0)
        {

        }
        else
        {
            List<byte> b = bytes.GetRange(i, 4);
        }
    }

    Console.ReadKey();
}
stop-cran
  • 4,229
  • 2
  • 30
  • 47
nik
  • 875
  • 2
  • 12
  • 21

13 Answers13

31

This is quite cute:

static class ChunkExtension
{
    public static IEnumerable<T[]> Chunkify<T>(
        this IEnumerable<T> source, int size)
    {
        if (source == null) throw new ArgumentNullException("source");
        if (size < 1) throw new ArgumentOutOfRangeException("size");
        using (var iter = source.GetEnumerator())
        {
            while (iter.MoveNext())
            {
                var chunk = new T[size];
                chunk[0] = iter.Current;
                for (int i = 1; i < size && iter.MoveNext(); i++)
                {
                    chunk[i] = iter.Current;
                }
                yield return chunk;
            }
        }
    }
}
static class Program
{
    static void Main(string[] args)
    {
        List<byte> bytes = new List<byte>() {
              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
        var chunks = bytes.Chunkify(4);
        foreach (byte[] chunk in chunks)
        {
            foreach (byte b in chunk) Console.Write(b.ToString("x2") + " ");
            Console.WriteLine();
        }
    }
}
Marc Gravell
  • 1,026,079
  • 266
  • 2,566
  • 2,900
  • 1
    adorable :)<-not enough characters – Chris McCall May 25 '11 at 22:47
  • Nice one. However, beware of uninitialized array elements when the number of elements in "source" is not a multiple of "size". – mkoertgen Oct 09 '13 at 16:26
  • 2
    Simple fix: declare "int i" outside the loop and use Array.Resize(...) if "i" is less than "size" afterwards. – mkoertgen Oct 10 '13 at 13:10
  • I'd convert that ugly inner `for` loop to `do .. while`. var chunk = new T[size]; var i = 0; do { chunk[i] = iter.Current; i++; } while (i < size && iter.MoveNext()); yield return chunk; – hIpPy Aug 16 '14 at 04:42
  • is there any benefit of iterating manually (like in the Chunikfy implementation) through the list over using foreach? – wischi Jan 22 '15 at 00:22
  • @wischi yes: code convenience; try re-writing the above with a `foreach`: it becomes a mess because of the two related loops (the data vs the array); for a *really* good example: `Zip` is a good one. – Marc Gravell Jan 22 '15 at 08:00
7

Voted answer works if you always get source.Length % size != 0, though it is too verbose. Here goes a nicer implementation:

public static IEnumerable<T[]> AsChunks<T>(IEnumerable<T> source, int size)
{
    var chunk = new T[size];
    var i = 0;
    foreach(var e in source)
    {
        chunk[i++] = e;
        if (i==size)
        {
            yield return chunk;
            i=0;
        }
    }
    if (i>0) // Anything left?
    {
        Array.Resize(ref chunk, i);
        yield return chunk;
    }
}

void Main()
{
    foreach(var chunk in AsChunks("Hello World!",5))
        Console.WriteLine(new string(chunk));
}

Producing:

  1. Hello
  2. Worl
  3. d!
Gus_sig
  • 149
  • 2
  • 2
3

This does it nicely:

    public static IEnumerable<IEnumerable<T>> GetBatches<T>(this IEnumerable<T> items, int batchsize) {
        var itemsCopy = items;
        while (itemsCopy.Any()) {
            yield return itemsCopy.Take(batchsize);
            itemsCopy = itemsCopy.Skip(batchsize);
        }
    }
colinbashbash
  • 996
  • 2
  • 9
  • 19
3

How about this:

var bytes = new List<byte>() { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };

var result = Chunkify(bytes, 4);

IEnumerable<IEnumerable<T>> Chunkify<T>(IEnumerable<T> source, int chunkSize)
{
    var indicies = 
        Enumerable.Range(0, source.Count()).Where(i => i%chunkSize==0);

    var chunks = 
            indicies
            .Select( i => source.Skip(i).Take(chunkSize) )
            .Select( chunk => new { Chunk=chunk, Count=chunk.Count() } )
            .Select( c => c.Count < chunkSize ? c.Chunk.Concat( Enumerable.Repeat( default(T), chunkSize - c.Count ) ) : c.Chunk )
            ;

    return chunks;      
}
Winston Smith
  • 21,585
  • 10
  • 60
  • 75
  • 2
    Note that this will enumerate `source` many times. So, for example, if it's a Linq to SQL query, you will execute the SQL query potentially hundreds of times! When writing methods like this for `IEnumerable`, it's desirable to only enumerate the sequence once. Check out [this implementation](http://stackoverflow.com/a/13710023/24874) to see what I mean. The OP was asking about a materialised collection of bytes where this is not such a problem, but others visiting this question may need to be aware of this distinction. – Drew Noakes Jul 27 '13 at 20:32
1
/// <summary>
/// Splits an array of bytes into a List<byte[]> holding the
/// chunks of the original array. If the size of the chunks is bigger than
/// the array it will return the original array to be split.
/// </summary>
/// <param name="array">The array to split</param>
/// <param name="size">the size of the chunks</param>
/// <returns></returns>
public static List<byte[]> SplitArray(byte[] array, int size)
{
    List<byte[]> chunksList = new List<byte[]>();
    int skipCounter = 0;

    while (skipCounter < array.Length)
    {
        byte[] chunk = array.Skip(skipCounter).Take(size).ToArray<byte>();
        chunksList.Add(chunk);
        skipCounter += chunk.Length;
    }
    return chunksList;
}
takrl
  • 6,356
  • 3
  • 60
  • 69
timba
  • 11
  • 1
1
    const int x = 4;
var bytes = new List<byte>() { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
var groups = bytes.Select((b, index) => new { b, index }).GroupBy(obj => obj.index / x).Select(group => new List<byte>(group.Select(i => i.b)));
var last = groups.Last();   
while (last.Count < x)
{
    last.Add(0);
}
Steck
  • 1,171
  • 9
  • 19
  • 1
    A nice solution, but note that it is forced to buffer the entire sequence first - this *may* be perfectly fine in most common cases. – Marc Gravell Jul 09 '10 at 08:22
1

You could try this:

    List<byte> bytes = new List<byte>() { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };

    int partLength = 4;
    int c = bytes.Count / partLength;

    if((c % partLength) != 0)
        c++; // we need one last list which will have to be filled with 0s

    List<List<byte>> allLists = new List<List<byte>>();

    for (int i = 0; i <= c; i++)
        allLists.Add(bytes.Take(partLength).ToList());

    int zerosNeeded = partLength - allLists.Last().Count;

    for (int i = 0; i < zerosNeeded; i++)
        allLists.Last().Add(0);

Ask if anything is unclear.

Koynov
  • 1,499
  • 1
  • 10
  • 19
1

You'll want to take Marc Gravell's solution of course, but I couldn't resist hacking together a pure LINQ version, just to see if it can be done:

static IEnumerable<T[]> LinqChunks<T>(IEnumerable<T> input, int chunkSize)
{
  return input
    //assign chunk numbers to elements by integer division
    .Select((x, index) => new {ChunkNr = index / chunkSize, Value = x})

    //group by chunk number
    .GroupBy(item => item.ChunkNr)

    //convert chunks to arrays, and pad with zeroes if necessary
    .Select(group =>
              {
                var block = group.Select(item => item.Value).ToArray();

                //if block size = chunk size -> return the block
                if (block.Length == chunkSize) return block;

                //if block size < chunk size -> this is the last block, pad it
                var lastBlock= new T[chunkSize];
                for (int i = 0; i < block.Length; i++) lastBlock[i] = block[i];
                return lastBlock;
              });
}
cfern
  • 5,956
  • 2
  • 25
  • 22
1

And if somebody wants purely functional solution -

static IEnumerable<T[]> Chunkify<T>(IEnumerable<T> input, int size)
{
    return input    
        .Concat(Enumerable.Repeat(default(T), size - input.Count() % size))
        .Select((x, i) => new { Value = x, Chunk = i / size })
        .GroupBy(x => x.Chunk, x => x.Value)
        .Select(x => x.ToArray());
}
Martin Jonáš
  • 2,309
  • 15
  • 12
0
static IEnumerable<T[]> Chunkify<T>(IEnumerable<T> items, int size)
    {
    var chunk = new List<T>(size);
    foreach (T item in items)
        {
        chunk.Add(item);
        if (chunk.Count == size)
            {
            yield return chunk.ToArray();
            chunk.Clear();
            }
        }
    if (chunk.Count > 0)
        {
        yield return chunk.ToArray();
        }
    }
Karlas
  • 981
  • 6
  • 5
0

This answer is more for the case of IEnumerable, but the question is marked as duplicate of this.

There are many solutions, but none lazy enough for me. This one does the trick:

  private class CachedEnumeration<T> : IEnumerable<T>  
  {  
    /// <summary>  
    /// enumerator for the cachedEnumeration class  
    /// </summary>  
    class CachedEnumerator : IEnumerator<T>  
    {  
      private readonly CachedEnumeration<T> m_source;  
      private int m_index;  
      public CachedEnumerator(CachedEnumeration<T> source)  
      {  
        m_source = source;  
        // start at index -1, since an enumerator needs to start with MoveNext before calling current  
        m_index = -1;  
      }  
      public T Current { get { return m_source.m_items[m_index]; } }  
      public void Dispose() { }  
      object System.Collections.IEnumerator.Current { get { return Current; } } 
      public bool MoveNext()  
      {  
        // if we have cached items, just increase our index  
        if (m_source.m_items.Count > m_index + 1)  
        {  
          m_index++;  
          return true;  
        }  
        else 
        {  
          var result = m_source.FetchOne();  
          if (result) m_index++;  
          return result;  
        }  
      }  
      public void Reset()  
      {  
        m_index = -1;  
      }  
    }  
    /// <summary>  
    /// list containing all the items  
    /// </summary>  
    private readonly List<T> m_items;  
    /// <summary>  
    /// callback how to fetch an item  
    /// </summary>  
    private readonly Func<Tuple<bool, T>> m_fetchMethod;  
    private readonly int m_targetSize;  
    public CachedEnumeration(int size, T firstItem, Func<Tuple<bool, T>> fetchMethod)  
    {  
      m_items = new List<T>(size);  
      m_items.Add(firstItem);  
      m_fetchMethod = fetchMethod;  
      m_targetSize = size;  
    }  
    public IEnumerator<T> GetEnumerator()  
    {  
      return new CachedEnumerator(this);  
    }  
    System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()  
    {  
      return GetEnumerator();  
    }  
    private bool FetchOne()  
    {  
      if (IsFull) return false;  
      var result = m_fetchMethod();  
      if (result.Item1) m_items.Add(result.Item2);  
      return result.Item1;  
    }  
    /// <summary>  
    /// fetches all items to the cached enumerable  
    /// </summary>  
    public void FetchAll()  
    {  
      while (FetchOne()) { }  
    }  
    /// <summary>  
    /// tells weather the enumeration is already full  
    /// </summary>  
    public bool IsFull { get { return m_targetSize == m_items.Count; } }  
  }  
  /// <summary>  
  /// partitions the <paramref name="source"/> to parts of size <paramref name="size"/>  
  /// </summary>  
  public static IEnumerable<IEnumerable<T>> Partition<T>(this IEnumerable<T> source, int size)  
  {  
    if (source == null) throw new ArgumentNullException("source");  
    if (size < 1) throw new ArgumentException(string.Format("The specified size ({0}) is invalid, it needs to be at least 1.", size), "size");  
    var enumerator = source.GetEnumerator();  
    while (enumerator.MoveNext())  
    {  
      var lastResult = new CachedEnumeration<T>(size, enumerator.Current, () => Tuple.Create(enumerator.MoveNext(), enumerator.Current));  
      yield return lastResult;  
      lastResult.FetchAll();  
    }  
  }  

You can find unit tests and the source here

Community
  • 1
  • 1
MBoros
  • 1,090
  • 7
  • 19
0
//without LINQ

List<byte> bytes = new List<byte>() { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
int x = 4;
int initialLength = bytes.Count;
for (int i = 0; i < (x - (initialLength % x)); i++) // adds enough 0's to list
{
    bytes.Add(0);
}

List<byte[]> byteList= new List<byte[]>(); // contains answers

for (int i=0;i<bytes.Count;i+=4)
{
    byteList.Add(bytes.GetRange(i,4).ToArray());
}   
simonalexander2005
  • 4,338
  • 4
  • 48
  • 92
0

I've been solving something similar on my project and I came up with this nice looking solution:

dataAsIEnumerable => your source you want to split into batches

BatchSize => your size of batch

            var batchSize = dataAsIEnumerable.Count / BatchSize;

            // not enought items, create at least one batch
            if (batchSize < 1)
                batchSize = 1;

            var dataAsList = dataAsIEnumerable.ToList();
            var batchAsSplit = new List<List<Model>>();

            for (int j = 0; j < batchSize; j++)
            {
                batchAsSplit.Add(dataAsList.GetRange(j * BatchSize, (dataAsList.Count - (j * BatchSize)) - BatchSize > 0 ? BatchSize : dataAsList.Count - (j * BatchSize)));
            }

            Parallel.ForEach(batchAsSplit, item =>
            {
                lock (MyContent)
                    MyContent.InsertBulk(item);
            });

Code enumerates IEnumerate type of collection into List, which has an operation GetRange and generates the collection of batches afterwards. Then performs bulk saving to MyContent (db).

st35ly
  • 1,215
  • 18
  • 24