There's a bug in Traverse()
that's causing it to iterate nodes more than once.
Bugged Code
public IEnumerable<HtmlNode> Traverse()
{
foreach (var node in _context)
{
yield return node;
foreach (var child in Children().Traverse())
yield return child;
}
}
public SharpQuery Children()
{
return new SharpQuery(_context.SelectMany(n => n.ChildNodes).Where(n => n.NodeType == HtmlNodeType.Element), this);
}
public SharpQuery(IEnumerable<HtmlNode> nodes, SharpQuery previous = null)
{
if (nodes == null) throw new ArgumentNullException("nodes");
_previous = previous;
_context = new List<HtmlNode>(nodes);
}
Test Code
static void Main(string[] args)
{
var sq = new SharpQuery(@"
<a>
<b>
<c/>
<d/>
<e/>
<f>
<g/>
<h/>
<i/>
</f>
</b>
</a>");
var nodes = sq.Traverse();
Console.WriteLine("{0} nodes: {1}", nodes.Count(), string.Join(",", nodes.Select(n => n.Name)));
Console.ReadLine();
Output
19 nodes: #document,a,b,c,g,h,i,d,g,h,i,e,g,h,i,f,g,h,i
Expected Output
Each letter a-i printed once.
Can't seem to figure out where's it going wrong... node.ChildNodes
does return just direct children, right? (from HtmlAgilityPack)
Full class (condensed) if you want to try and run it yourself.
public class SQLite
{
private readonly List<HtmlNode> _context = new List<HtmlNode>();
private readonly SQLite _previous = null;
public SQLite(string html)
{
var doc = new HtmlDocument();
doc.LoadHtml(html);
_context.Add(doc.DocumentNode);
}
public SQLite(IEnumerable<HtmlNode> nodes, SQLite previous = null)
{
if (nodes == null) throw new ArgumentNullException("nodes");
_previous = previous;
_context = new List<HtmlNode>(nodes);
}
public IEnumerable<HtmlNode> Traverse()
{
foreach (var node in _context)
{
yield return node;
foreach (var child in Children().Traverse())
yield return child;
}
}
public SQLite Children()
{
return new SQLite(_context.SelectMany(n => n.ChildNodes).Where(n => n.NodeType == HtmlNodeType.Element), this);
}
}