I am using a library that provides a Traversable[T] that pages through database results. I'd like to avoid loading the whole thing into memory, so I am trying to convert it to a Stream[T].
From what I can tell, the built in "asStream" method loads the whole Traversable into a Buffer, which defeats my purpose. My attempt (below) hits a StackOverflowException on large results, and I can't tell why. Can someone help me understand what is going on? Thanks!
def asStream[T](traversable: => Traversable[T]): Stream[T] = {
if (traversable.isEmpty) Empty
else {
lazy val head = traversable.head
lazy val tail = asStream(traversable.tail)
head #:: tail
}
}
Here's a complete example that reproduces this, based on a suggestion by @SCouto
import scala.collection.immutable.Stream.Empty
object StreamTest {
def main(args: Array[String]) = {
val bigVector = Vector.fill(90000)(1)
val optionStream = asStream(bigVector).map(v => Some(v))
val zipped = optionStream.zipAll(optionStream.tail, None, None)
}
def asStream[T](traversable: => Traversable[T]): Stream[T] = {
@annotation.tailrec
def loop(processed: => Stream[T], pending: => Traversable[T]): Stream[T] = {
if (pending.isEmpty) processed
else {
lazy val head = pending.head
lazy val tail = pending.tail
loop(processed :+ head, tail)
}
}
loop(Empty, traversable)
}
}
Edit: After some interesting ideas from @SCouto, I learned this could also be done with trampolines to keep the result as a Stream[T] that is in the original order
object StreamTest {
def main(args: Array[String]) = {
val bigVector = Range(1, 90000).toVector
val optionStream = asStream(bigVector).map(v => Some(v))
val zipped = optionStream.zipAll(optionStream.tail, None, None)
zipped.take(10).foreach(println)
}
def asStream[T](traversable: => Traversable[T]): Stream[T] = {
sealed trait Traversal[+R]
case class More[+R](result: R, next: () => Traversal[R]) extends Traversal[R]
case object Done extends Traversal[Nothing]
def next(currentTraversable: Traversable[T]): Traversal[T] = {
if (currentTraversable.isEmpty) Done
else More(currentTraversable.head, () => next(currentTraversable.tail))
}
def trampoline[R](body: => Traversal[R]): Stream[R] = {
def loop(thunk: () => Traversal[R]): Stream[R] = {
thunk.apply match {
case More(result, next) => Stream.cons(result, loop(next))
case Done => Stream.empty
}
}
loop(() => body)
}
trampoline(next(traversable))
}
}