3

I'm using a method that has a long processing with many results to return, but the correct result might be any of the returned ones, lets say after 300,000 results but the rest 700,000 Whether the return is correct or not is checked in the following code at the main:

//a that suppose to return a value at need.
//Main func might need few returns and not all so 
static IEnumerable<int> foo() {
    //long recusive process, might contain over 1 million results if being asked to yield all.
    yield return ret;
}

static void Main(string[] args) {
    var a = foo();
    while (true) {
        var p = a.Take(300); //takes first 300 every loop in the while-loop
        foreach (var c in p) {
            //does something with it        
            if (bar == true) //if it is the right one:
                goto _break;            
        }
    }
    _break:
    Console.Read(); //pause
}

Unfortunately the code recalculates the 300 returns again and again.

My Question

How can I possibly pull out only 300 results every time without having to go from the beginning again (using Skip(n) and then Take(n)) and w/o converting it into a Collection while obviously keeping the IEnumerable structure in the function foo.

What Am I Trying To Achive?

Before I began using the yield method I had a linear-non-efficient procedure which turned out to be faster than the new one. Nothing really changed except for seperating the content of foo() into an external method so I can yield the results one by one instead of taking them all at first and only then to process. And yet, the performance was quite horrible. I'm talking about from 300ms to 700ms. I noticed that when asking for all of the results (foo().ToArray()) it is even faster than using the yield return for checking if bar == true.

So what I would like to do is take 300->sample them, if not found->continue taking 300's till found.

Illustrating Code

static void Main(string[] args) {
    var a = loly();
    while(true){
        var p = a.Take(3);
        foreach (var c in p) {
            Console.Write(c);
            if (c==4)
                goto _break;
        }
    }

    _break:
    Console.Read();
}

static IEnumerable<int> loly() {
    var l = new[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
    for (int i = 0; i < 9; i++) {
        yield return l[i];
    }            
} 

This outputs: 123123123 and so on

Answers Put To Practice

class Program {
    static void Main(string[] args) {
        var j = 0;
        var a = new EnumerationPartitioner<int>(loly().GetEnumerator());
        while(true) {
            foreach (var c in a.Pull(3)) {
                Console.WriteLine(c);
                Console.WriteLine("("+(++j)+")");
            }
            if (a.Ended)
                break;
        }

        foreach (var part in loly().ToInMemoryBatches(7)) {
            foreach (var c in part) {
                Console.WriteLine(c);
                Console.WriteLine("("+(++j)+")");
            }
        }
        


        Console.Read();
    }

    static IEnumerable<int> loly() {
        var l = new[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
        for (int i = 0; i < 9; i++) {
            yield return l[i];
        }            
    } 
}

//Tallseth's method
public static class EnumerationPartitioner {
    public static IEnumerable<IEnumerable<T>> ToInMemoryBatches<T>(this IEnumerable<T> source, int batchSize) {
        List<T> batch = null;
        foreach (var item in source)
        {
            if (batch == null)
                batch = new List<T>();

            batch.Add(item);

            if (batch.Count != batchSize)
                continue;

            yield return batch;
            batch = null;
        }

        if (batch != null)
            yield return batch;
    }
}

//MarcinJuraszek's method
public class EnumerationPartitioner<T> : IEnumerable<T> {

    /// <summary>
    /// Has the enumeration ended?
    /// </summary>
    public bool Ended {
        get { return over; }
    }

    public IEnumerator<T> Enumerator { get; private set; }

    public EnumerationPartitioner(IEnumerator<T> _enum) {
        Enumerator = _enum;
    }

    /// <summary>
    /// Has the enumeration ended
    /// </summary>
    private bool over = false;

    /// <summary>
    /// Items that were pulled from the <see cref="Enumerator"/>
    /// </summary>
    private int n = 0;

    /// <summary>
    /// Pulls <paramref name="count"/> items out of the <see cref="Enumerator"/>.
    /// </summary>
    /// <param name="count">Number of items to pull out the <see cref="Enumerator"/></param>
    public List<T> Pull(int count) {
        var l = new List<T>();
        if (over) return l;
        for (int i = 0; i < count; i++, n++) {
            if ((Enumerator.MoveNext()) == false) {
                over = true;
                return l;
            }
            l.Add(Enumerator.Current);
        }
        return l;
    }

    /// <summary>
    /// Resets the Enumerator and clears internal counters, use this over manual reset
    /// </summary>
    public void Reset() {
        n = 0;
        over = false;
        Enumerator.Reset();
    }


    public IEnumerator<T> GetEnumerator() {
        return Enumerator;
    }

    System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
        return Enumerator;
    }
}
Community
  • 1
  • 1
NucS
  • 619
  • 8
  • 21
  • I actually use them quite often > – NucS Jan 13 '14 at 03:09
  • Why process 300 at a time? Why not just one at a time since you are looking for a specific item in the entire enumeration? – myermian Jan 13 '14 at 03:20
  • possible duplicate of [Divide array into an array of subsequence array](http://stackoverflow.com/questions/3210824/divide-array-into-an-array-of-subsequence-array) – Alexei Levenkov Jan 13 '14 at 03:22
  • @m-y Please read the section "What Am I Trying To Achive?" and Alexei, It's not what I meant – NucS Jan 13 '14 at 03:25
  • Hmmm... I thought you wanted to go through sequence in 300 element chunks - I'm lost on what you wanted, sorry... Would you mind posting code (non-compilable) that explains what you trying to achieve? – Alexei Levenkov Jan 13 '14 at 03:34
  • @NucS: I read what you wrote in the "why" section. If your return type is `IEnumerable` and you `Take(300)` on the enumeration you are calling into the method 300 times, then reading through those subset values (you are *possibly* iterating elements twice). From the answers Alexei guessed that you probably want an enumeration of enumerations, hopefully each enumeration subset is generated through a single call (such as making one SP call instead of the same one 300 times). Your question just wasn't worded as such. – myermian Jan 13 '14 at 03:41

3 Answers3

5

I've needed to do this regularly. As Alexei alludes to, an enumerable of enumerable is the thing I've wanted when dealing with this shape of problem.

    public static IEnumerable<IEnumerable<T>> ToInMemoryBatches<T>(this IEnumerable<T> source, int batchSize)
    {
        List<T> batch = null;
        foreach (var item in source)
        {
            if (batch == null)
                batch = new List<T>();

            batch.Add(item);

            if (batch.Count != batchSize)
                continue;

            yield return batch;
            batch = null;
        }

        if (batch != null)
            yield return batch;
    }
tallseth
  • 3,635
  • 1
  • 23
  • 24
  • +1 Similar to sample from [linked chinking question](http://stackoverflow.com/questions/3210824/divide-array-into-an-array-of-subsequence-array). Note that one can even `yield return` inner sub-sequences if really wanted with some more code... But apparently chucking this way is not what OP needs. – Alexei Levenkov Jan 13 '14 at 03:37
  • Hmm, I see that now. I'm confused in that case, guess we'll wait for clarification. – tallseth Jan 13 '14 at 03:41
  • Side note check out Jon Skeet's answer http://stackoverflow.com/questions/21099852/inside-of-using-block-need-to-dispose-and-reinstantiate#21100041 that shows off MoreLINQ `Batch` extension. – Alexei Levenkov Jan 13 '14 at 19:52
2

You can use the enumerator directly instead of relying on foreach loop:

static void Main(string[] args)
{
    var a = loly();
    var partitionSize = 3;

    using (var enumerator = a.GetEnumerator())
    {
        var values = new List<int>(partitionSize);
        for (int i = 0; i < 3; i++)
        {
            values.Clear();
            for (int j = 0; j < partitionSize && enumerator.MoveNext(); j++)
            {
                values.Add(enumerator.Current);
            }

            foreach (var c in values)
            {
                Console.Write(c);
            }
        }
    }

    Console.Read();
}
MarcinJuraszek
  • 124,003
  • 15
  • 196
  • 263
0

I have made two methods, difference is that partition sizes are not fixed, one is using partition sizes and other partition end indexes, also if last partition is not full, it will be resized.

    public static IEnumerable<T[]> PartitionBySize<T>(this IEnumerable<T> source, int[] sizes)
    {
        using (var iter = source.GetEnumerator())
            foreach (var size in sizes)
                if (iter.MoveNext())
                {
                    var chunk = new T[size];
                    chunk[0] = iter.Current;
                    int i = 1;
                    for (; i < size && iter.MoveNext(); i++)
                        chunk[i] = iter.Current;
                    if (i < size)
                        Array.Resize(ref chunk, i);
                    yield return chunk;
                }
                else
                    yield break;
    }

    public static IEnumerable<T[]> PartitionByIdx<T>(this IEnumerable<T> source, int[] indexes)
    {
        int last = -1;
        using (var iter = source.GetEnumerator())
            foreach (var idx in indexes)
            {
                int size = idx - last;
                last = idx;
                if (iter.MoveNext())
                {
                    var chunk = new T[size];
                    chunk[0] = iter.Current;
                    int i = 1;
                    for (; i < size && iter.MoveNext(); i++)
                        chunk[i] = iter.Current;
                    if (i < size)
                        Array.Resize(ref chunk, i);
                    yield return chunk;
                }
                else
                    yield break;
            }
    }
watbywbarif
  • 6,487
  • 8
  • 50
  • 64