6

I implemented a bidirectional iterator, however instead of operating on a data structure, it returns a mathematical series which one can iteratively calculate through in both directions. In fact, I'm iterating through the integers, using ++ and -- on an int. This means that the data is not stored in a different structure, and hence when the iterator goes out of scope, so does the value.

Nevertheless, I would expect the next code (minimal failing example) sample to work, as the iterator stays in scope the whole time. But it does not work :(

#include <iostream>
#include <iterator>
#include <vector>

class my_iterator : public std::iterator<std::bidirectional_iterator_tag, int> {
  int d_val = 12;
public:
  my_iterator  operator--(int) { std::cout << "decrement--\n"; return my_iterator(); }
  my_iterator &operator--()    { std::cout << "--decrement\n"; return *this; }
  my_iterator  operator++(int) { std::cout << "increment++\n"; return my_iterator(); }
  my_iterator &operator++()    { std::cout << "++increment\n"; return *this; }

  int &operator*() { std::cout << "*dereference\n"; return d_val; }

  bool operator==(my_iterator const  &o) { return false; }
  bool operator!=(my_iterator const  &o) { return true ; }
};


int main() {
  auto it = std::reverse_iterator<my_iterator>();
  int &i = *it;
  if (true)
  {
    std::cout << i << '\n';
  }
  else
  {
    std::vector<int> vec;
    vec.push_back(i);
    std::cout << vec[0] << '\n';
  }
}

source: http://ideone.com/YJKvpl

The if-branch results in memory violations, as properly detected by valgrind:

--decrement
*dereference
==7914== Use of uninitialised value of size 8
==7914==    at 0x4EC15C3: ??? (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.20)
==7914==    by 0x4EC16FB: std::ostreambuf_iterator<char, std::char_traits<char> > std::num_put<char, std::ostreambuf_iterator<char, std::char_traits<char> > >::_M_insert_int<long>(std::ostreambuf_iterator<char, std::char_traits<char> >, std::ios_base&, char, long) const (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.20)
==7914==    by 0x4EC1C7C: std::num_put<char, std::ostreambuf_iterator<char, std::char_traits<char> > >::do_put(std::ostreambuf_iterator<char, std::char_traits<char> >, std::ios_base&, char, long) const (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.20)
==7914==    by 0x4ECEFB9: std::ostream& std::ostream::_M_insert<long>(long) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.20)
==7914==    by 0x40087B: main (divine.cc:25)
==7914== 
==7914== Conditional jump or move depends on uninitialised value(s)
==7914==    at 0x4EC15CF: ??? (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.20)
==7914==    by 0x4EC16FB: std::ostreambuf_iterator<char, std::char_traits<char> > std::num_put<char, std::ostreambuf_iterator<char, std::char_traits<char> > >::_M_insert_int<long>(std::ostreambuf_iterator<char, std::char_traits<char> >, std::ios_base&, char, long) const (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.20)
==7914==    by 0x4EC1C7C: std::num_put<char, std::ostreambuf_iterator<char, std::char_traits<char> > >::do_put(std::ostreambuf_iterator<char, std::char_traits<char> >, std::ios_base&, char, long) const (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.20)
==7914==    by 0x4ECEFB9: std::ostream& std::ostream::_M_insert<long>(long) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.20)
==7914==    by 0x40087B: main (divine.cc:25)
==7914== 
==7914== Conditional jump or move depends on uninitialised value(s)
==7914==    at 0x4EC1724: std::ostreambuf_iterator<char, std::char_traits<char> > std::num_put<char, std::ostreambuf_iterator<char, std::char_traits<char> > >::_M_insert_int<long>(std::ostreambuf_iterator<char, std::char_traits<char> >, std::ios_base&, char, long) const (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.20)
==7914==    by 0x4EC1C7C: std::num_put<char, std::ostreambuf_iterator<char, std::char_traits<char> > >::do_put(std::ostreambuf_iterator<char, std::char_traits<char> >, std::ios_base&, char, long) const (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.20)
==7914==    by 0x4ECEFB9: std::ostream& std::ostream::_M_insert<long>(long) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.20)
==7914==    by 0x40087B: main (divine.cc:25)
==7914== 
12
==7914== 
==7914== HEAP SUMMARY:
==7914==     in use at exit: 0 bytes in 0 blocks
==7914==   total heap usage: 0 allocs, 0 frees, 0 bytes allocated
==7914== 
==7914== All heap blocks were freed -- no leaks are possible
==7914== 
==7914== For counts of detected and suppressed errors, rerun with: -v
==7914== Use --track-origins=yes to see where uninitialised values come from
==7914== ERROR SUMMARY: 5 errors from 3 contexts (suppressed: 0 from 0)

The else-branch doesn't result in memory violations, or at least as far as my valgrind can detect. However, the value stored in the vector is 'random':

--decrement
*dereference
-16777520

I'm a bit surprised by what happens. The iterator should be in scope all the time, yet the reference seems to get invalidated. Why do I get the memory violations, whilst 12 is printed or why don't I get them whilst something different from 12 is stored?

Herbert
  • 5,279
  • 5
  • 44
  • 69
  • I think the problem is not with `it` but with `*it`, subtle difference. If you added `const` to the reference it would work I'm sure. – Mark Ransom Feb 18 '15 at 22:50
  • Hi Mark, when I add const, it's not a bidirectional iterator anymore I'm afraid, and the reverse_iterator won't accept it. – Herbert Feb 18 '15 at 22:52
  • 3
    `reverse_iterator::operator*` returns a reference into a temporary in this case: It internally creates a *new* iterator object, decrements that, and returns what that new, decremented operator refers to (which is a data member of a local variable): [cppreference: `reverse_iterator::operator*`](http://en.cppreference.com/w/cpp/iterator/reverse_iterator/operator*) – dyp Feb 18 '15 at 22:52
  • Sorry, I meant `const int &i`, you evidently tried to add it to `it`. – Mark Ransom Feb 18 '15 at 22:55
  • @MarkRansom If I change **int &i** into **int const &i** or **const int &i** the behavior is the same. EDIT: only the else-branch behavior stays the same, the memory violations of the if-branch are gone. – Herbert Feb 18 '15 at 22:56
  • [Live example of the lifetime issue](http://coliru.stacked-crooked.com/a/dfd25528cd435662) – dyp Feb 18 '15 at 22:59
  • @dyp You mean **new my_iterator** I guess, does that mean that this is intrinsically determined to not work, since I have no way to make sure the references stay in scope long enough, as there is no underlying data structure storing all of them? – Herbert Feb 18 '15 at 23:01
  • It seems so: the iterators are supposed to have pointer semantics, not "value semantics", i.e. they're not supposed to own the values they're pointing to. – dyp Feb 18 '15 at 23:03
  • One way to do it, I guess, is using a shared_ptr, each time the cc is called, the shared_ptr is also cc'ed, however, each time the value is updated, the iterator with the value get's a fresh shared_ptr with the new value. – Herbert Feb 18 '15 at 23:19
  • How does that solve the problem? When the decrement happens inside `reverse_iterator::operator*`, you'd make a fresh `shared_ptr` for the copy, but then the copy gets destroyed, and the `shared_ptr` with it. You might be better off just "lying" and using `int` as `my_iterator::reference`. After all, the standard library also lies with `vector::iterator`, which is purportedly a random access iterator, but in reality isn't even a forward iterator. – T.C. Feb 19 '15 at 01:43
  • @T.C. It doesn't really, as iterators can not be made responsible for keeping the data the reference to in scope. However, using the shared_ptr's, the data stays in the same place when the iterator is copied or moved. It gets duplicated if the iterator is updated using ++/-- and hence may go out of scope if all the other iterators with the same shared_ptr do. So it partially solves the problem, but it's not Pandora's box. However, this requirement is enough for std::copy to work :) http://ideone.com/KLcfX5 – Herbert Feb 19 '15 at 10:55
  • @Herbert Nah, it's UB. Remember that the problem is with `Iterator tmp = current; return *--tmp;` in `operator *`'s body. After `--tmp` it has a new `shared_ptr` pointing to a new block of memory, and since it's now the only `shared_ptr` that owns the memory, the memory is freed when `tmp` is destoryed, and you still return a dangling reference. – T.C. Feb 19 '15 at 11:05
  • @Herbert You just got (un)lucky because the memory, though freed, happens to still contain the desired value. If you use something that [overwrites the stored value when it is destroyed](http://coliru.stacked-crooked.com/a/6ecb8333636f82b3) (or run it under valgrind) it will break. – T.C. Feb 19 '15 at 11:06
  • @T.C. You are right :( I think value iterators are to be considered evil. – Herbert Feb 19 '15 at 11:23

2 Answers2

10

reverse_iterator does not work with so-called "stashing iterators", iterators that returns references to things within themselves. The operator* of reverse_iterator makes a copy of the wrapped iterator, decrements it, and returns the result of dereferencing the copy. Hence, if dereferencing iterator returns a reference to something inside itself, the reference will become dangling.

An attempt was made in the C++11 specification to make it work, but it turns out that it's impossible to implement without adding massive overhead* for non-stashing iterators, so the specification was reverted to the C++03 version.


* To support "stashing iterators", an additional data member must be added that stores the decremented current iterator, doubling the size of reverse_iterator; and then some form of synchronization must be used since operator * is const - and so must be simultaneously callable from multiple threads without causing a data race - but must modify this additional data member. That's a lot of overhead to add to all reverse_iterators for such an uncommon use case.

T.C.
  • 133,968
  • 17
  • 288
  • 421
  • Well, the iterators are allowed to invalidate returned references and pointers after destruction ([iterator.requirements.general] /10). Since std::reverse_iterator returns references/pointers it obtained via destructed iterators (since it does not store the original iterator), it does not meet requirement [iterator.iterators]/2, since *it may return an invalid reference or pointer. Is this a leak in the standard, or am I missing something? Why is std::reverse_iterator not an iterator, as one would expect? – Herbert Feb 26 '15 at 15:02
  • 1
    @Herbert `reverse_iterator` is not a class. It's a class template. A specialization `reverse_iterator` is a perfectly valid iterator, as long as `sane_iterator`'s destructor does not invalidate pointers and references. I'm also not convinced that a stashing iterator is a valid forward iterator - [forward.iterators]/p6 says that "If `a` and `b` are both dereferenceable, then `a == b` if and only if `*a` and `*b` are bound to the same object." This implies that a copy of a stashing iterator cannot compare equal to the original, which in turn breaks CopyConstructible requirements. – T.C. Feb 27 '15 at 01:46
  • That is true, however my shared_ptr implementation would fix this: http://ideone.com/KLcfX5 However, std::reverse_iterator does not keep the copy in memory post-derement. Hence after the decrement, the reference is invalidated. – Herbert Feb 27 '15 at 10:22
  • @Herbert Not really. For forward iterators, `a == b` implies `++a == ++b`, but in your design, given `a` and `b` with a `shared_ptr` to the same thing, `++a` and `++b` will point to different things (as each will do an allocation separately), hence violating the requirement in [forward.iterators]/p6. – T.C. Feb 27 '15 at 15:15
1

As has been mentioned, the C++03 and C++14 standards define reverse_iterator::operator* in this way:

24.5.1.3.4 operator* [reverse.iter.op.star]

reference operator*() const;

1 Effects:

Iterator tmp = current;
return *--tmp;

tmp is destroyed after operator* returns, so any references to data stored inside of tmp will become invalid. The C++11 standard changed this and added a note:

24.5.1.3.4 operator* [reverse.iter.op.star]

reference operator*() const;

1 Effects:

deref_tmp = current;
--deref_tmp;
return *deref_tmp;

2 [ Note: This operation must use an auxiliary member variable rather than a temporary variable to avoid returning a reference that persists beyond the lifetime of its associated iterator. (See 24.2.) —end note ]

This is actually impossible to implement because of the const qualifier on operator*, so the wording was reverted between C++11 and C++14.

The best solution is probably to just implement your own version of reverse_iterator based on the C++11 wording for whatever version of C++ you are targeting. Fortunately, the specification is very straightforward and easy to follow. As a working example, here's one I wrote for C++14:

template <class Iterator>
class stashing_reverse_iterator :
  public std::iterator<
    typename std::iterator_traits<Iterator>::iterator_category,
    typename std::iterator_traits<Iterator>::value_type,
    typename std::iterator_traits<Iterator>::difference_type,
    typename std::iterator_traits<Iterator>::pointer,
    typename std::iterator_traits<Iterator>::reference
  > {
  typedef std::iterator_traits<Iterator> traits_type;
public:
  typedef Iterator iterator_type;
  typedef typename traits_type::difference_type difference_type;
  typedef typename traits_type::reference       reference;
  typedef typename traits_type::pointer         pointer;

  stashing_reverse_iterator() : current() {}

  explicit stashing_reverse_iterator(Iterator x) : current(x) {}

  template <class U>
  stashing_reverse_iterator(const stashing_reverse_iterator<U>& u) : current(u.current) {}

  template <class U>
  stashing_reverse_iterator& operator=(const stashing_reverse_iterator<U>& u) {
    current = u.base();
    return *this;
  }

  Iterator base() const {
    return current;
  }

  // Differs from reverse_iterator::operator*:
  // 1. const qualifier removed
  // 2. current iterator is stored in a member field to ensure references are
  //    always valid after this function returns
  reference operator*() {
    deref_tmp = current;
    --deref_tmp;
    return *deref_tmp;
  }

  pointer operator->() const {
    return std::addressof(operator*());
  }

  stashing_reverse_iterator& operator++() {
    --current;
    return *this;
  }

 stashing_reverse_iterator operator++(int) {
    stashing_reverse_iterator tmp = *this;
    --current;
    return tmp;
  }

  stashing_reverse_iterator& operator--() {
    ++current;
    return *this;
  }

  stashing_reverse_iterator operator--(int) {
    stashing_reverse_iterator tmp = *this;
    ++current;
    return tmp;
  }

  stashing_reverse_iterator  operator+ (difference_type n) const {
    return stashing_reverse_iterator(current - n);
  }

  stashing_reverse_iterator& operator+=(difference_type n) {
    current -= n;
    return *this;
  }

  stashing_reverse_iterator  operator- (difference_type n) const {
    return stashing_reverse_iterator(current + n);
  }

  stashing_reverse_iterator& operator-=(difference_type n) {
    current += n;
    return *this;
  }

  // Differs from reverse_iterator::operator[]:
  // 1. const qualifier removed because this function makes use of operator*
  reference operator[](difference_type n) {
    return *(*this + n);
  }

protected:
  Iterator current;
private:
  Iterator deref_tmp;
};

template <class Iterator1, class Iterator2>
bool operator==(
  const stashing_reverse_iterator<Iterator1>& x,
  const stashing_reverse_iterator<Iterator2>& y)
{ return x.base() == y.base(); }

template <class Iterator1, class Iterator2>
bool operator<(
  const stashing_reverse_iterator<Iterator1>& x,
  const stashing_reverse_iterator<Iterator2>& y)
{ return x.base() > y.base(); }

template <class Iterator1, class Iterator2>
bool operator!=(
  const stashing_reverse_iterator<Iterator1>& x,
  const stashing_reverse_iterator<Iterator2>& y)
{ return !(x.base() == y.base()); }

template <class Iterator1, class Iterator2>
bool operator>(
  const stashing_reverse_iterator<Iterator1>& x,
  const stashing_reverse_iterator<Iterator2>& y)
{ return x.base() < y.base(); }

template <class Iterator1, class Iterator2>
bool operator>=(
  const stashing_reverse_iterator<Iterator1>& x,
  const stashing_reverse_iterator<Iterator2>& y)
{ return x.base() <= y.base(); }

template <class Iterator1, class Iterator2>
bool operator<=(
  const stashing_reverse_iterator<Iterator1>& x,
  const stashing_reverse_iterator<Iterator2>& y)
{ return x.base() >= y.base(); }

template <class Iterator1, class Iterator2>
auto operator-(
  const stashing_reverse_iterator<Iterator1>& x,
  const stashing_reverse_iterator<Iterator2>& y) -> decltype(y.base() - x.base())
{ return y.base() - x.base(); }

template <class Iterator>
stashing_reverse_iterator<Iterator> operator+(
  typename stashing_reverse_iterator<Iterator>::difference_type n,
  const stashing_reverse_iterator<Iterator>& x)
{ return stashing_reverse_iterator<Iterator>(x.base() - n); }

template <class Iterator>
stashing_reverse_iterator<Iterator> make_stashing_reverse_iterator(Iterator i)
{ return stashing_reverse_iterator<Iterator>(i); }

Usage is the same as reverse_iterator:

// prints 5,4,3,2,1, for a sanely implemented number_iterator
std::copy(
  make_stashing_reverse_iterator(number_iterator(5)),
  make_stashing_reverse_iterator(number_iterator(0)),
  std::ostream_iterator<int>(std::cout, ","));
J.C. Moyer
  • 11
  • 2
  • Hi, could you please clarify the license of your code snippet? I see that the whole stackoverflow is CC-BY-SA 3.0; can you maybe re-license this under BSD or Apache or something similar? – Jan Kundrát Feb 05 '17 at 11:54