8

Function to be refactored...

<T> T notUsedRandomItem(List<T> allItems, List<T> usedItems) {
    return allItems.stream()
            .filter(item -> !usedItems.contains(item))
            .sorted((o1, o2) -> new Random().nextInt(2) - 1)
            .findFirst()
            .orElseThrow(() -> new RuntimeException("Did not find item!"));
}

Function might be used like this...

System.out.println(
            notUsedRandomItem(
                    Arrays.asList(1, 2, 3, 4), 
                    Arrays.asList(1, 2)
            )
    ); // Should print either 3 or 4

Edit: Collected suggested implementations and tested efficiency by running them against Person lists.

edit2: Added missing equals method to Person class.

import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import static java.util.stream.Collectors.toList;

class Functions {

    <T> T notUsedRandomItemOriginal(List<T> allItems, List<T> usedItems) {
        return allItems.stream()
                .filter(item -> !usedItems.contains(item))
                .sorted((o1, o2) -> new Random().nextInt(2) - 1)
                .findFirst()
                .orElseThrow(() -> new RuntimeException("Did not find item!"));
    }

    <T> T notUsedRandomItemByAominè(List<T> allItems, List<T> usedItems) {
        List<T> distinctItems = allItems.stream()
                .filter(item -> !usedItems.contains(item))
                .collect(toList());

        if (distinctItems.size() == 0) throw new RuntimeException("Did not find item!");

        return distinctItems.get(new Random().nextInt(distinctItems.size()));
    }

    <T> T notUsedRandomItemByEugene(List<T> allItems, List<T> usedItems) {

        // this is only needed because your input List might not support removeAll
        List<T> left = new ArrayList<>(allItems);
        List<T> right = new ArrayList<>(usedItems);

        left.removeAll(right);

        return left.get(new Random().nextInt(left.size()));
    }

    <T> T notUsedRandomItemBySchaffner(List<T> allItems, List<T> usedItems) {

        Set<T> used = new HashSet<>(usedItems);
        List<T> all = new ArrayList<>(allItems);

        Collections.shuffle(all);

        for (T item : all) if (!used.contains(item)) return item;

        throw new RuntimeException("Did not find item!");
    }
}

public class ComparingSpeedOfNotUsedRandomItemFunctions {

    public static void main(String[] plaa) {
        runFunctionsWith(100);
        runFunctionsWith(1000);
        runFunctionsWith(10000);
        runFunctionsWith(100000);
        runFunctionsWith(200000);
    }

    static void runFunctionsWith(int itemCount) {

        TestConfiguration testConfiguration = new TestConfiguration();
        Functions functions = new Functions();

        System.out.println("Function execution time with " + itemCount + " items...");

        System.out.println("Schaffner: " +
                testConfiguration.timeSpentForFindingNotUsedPeople(
                        itemCount, (allPeople, usedPeople) ->
                                functions.notUsedRandomItemBySchaffner(allPeople, usedPeople)
                ));

        System.out.println("Eugene: " +
                testConfiguration.timeSpentForFindingNotUsedPeople(
                        itemCount, (allPeople, usedPeople) ->
                                functions.notUsedRandomItemByEugene(allPeople, usedPeople)
                ));

        System.out.println("Aominè: " +
                testConfiguration.timeSpentForFindingNotUsedPeople(
                        itemCount, (allPeople, usedPeople) ->
                                functions.notUsedRandomItemByAominè(allPeople, usedPeople)
                ));

        System.out.println("Original: " +
                testConfiguration.timeSpentForFindingNotUsedPeople(
                        itemCount, (allPeople, usedPeople) ->
                                functions.notUsedRandomItemOriginal(allPeople, usedPeople)
                ));
    }

}

class TestConfiguration {

    Long timeSpentForFindingNotUsedPeople(int numberOfPeople, BiFunction<List<Person>, List<Person>, Person> function) {

        ArrayList<Person> people = new ArrayList<>();
        IntStream.range(1, numberOfPeople).forEach(i -> people.add(new Person()));
        Collections.shuffle(people);

        List<Person> halfOfPeople =
                people.stream()
                        .limit(numberOfPeople / 2)
                        .collect(Collectors.toList());

        Collections.shuffle(halfOfPeople);

        long before = System.nanoTime();
        Person foundItem = function.apply(people, halfOfPeople);
        long after = System.nanoTime();

        // Return -1 if function do not return valid answer
        if (halfOfPeople.contains(foundItem))
            return (long) -1;

        return TimeUnit.MILLISECONDS.convert(after - before, TimeUnit.NANOSECONDS);
    }

    class Person {
        public final String name = UUID.randomUUID().toString();

        @Override
        public boolean equals(Object o) {
            if (this == o) return true;
            if (o == null || getClass() != o.getClass()) return false;

            Person person = (Person) o;

            return name != null ? name.equals(person.name) : person.name == null;
        }

        @Override
        public int hashCode() {
            return name != null ? name.hashCode() : 0;
        }
    }
}

Results:

Function execution time with 100 items...
Schaffner: 0
Eugene: 1
Aominè: 2
Original: 5
Function execution time with 1000 items...
Schaffner: 0
Eugene: 14
Aominè: 13
Original: 5
Function execution time with 10000 items...
Schaffner: 2
Eugene: 564
Aominè: 325
Original: 348
Function execution time with 20000 items...
Schaffner: 3
Eugene: 1461
Aominè: 1418
Original: 1433
Function execution time with 30000 items...
Schaffner: 3
Eugene: 4616
Aominè: 2832
Original: 4567
Function execution time with 40000 items...
Schaffner: 4
Eugene: 10889
Aominè: 4903
Original: 10394

Conclusion

When list size reach 10000 items then so far only Schaffner's implementation is usable.

And because it's fairly simple to read I will pick it as the most elegant solution.

Ari Aaltonen
  • 129
  • 8
  • 2
    Does it have to be a list? It might be a lot more efficient/easier to work with Set. – Artur Biesiadowski Dec 20 '17 at 20:05
  • 1
    that is one interesting `Comparator`... – Eugene Dec 20 '17 at 20:08
  • You say elegant. If you really mean efficient, it would be more efficient to select a random number based on the count and grab by index, rather than rearranging the collection. – N-ate Dec 20 '17 at 20:09
  • What is the reason for the stream requirement? And read [the comments on this answer](https://stackoverflow.com/a/31703655/7653073), they strongly discourage using a broken comparator. Read for similar questions [here](https://stackoverflow.com/questions/31703226/how-to-get-random-objects-from-a-stream) and [here](https://stackoverflow.com/questions/28651908/perform-operation-on-n-random-distinct-elements-from-collection-using-streams-ap/28671411#28671411) – Malte Hartwig Dec 20 '17 at 21:02
  • @MalteHartwig No reason to use streams but implementation must be based on standard java 8 library. – Ari Aaltonen Dec 20 '17 at 21:52
  • 1
    @N-ate By elegant solution I'm looking for implementation which is both efficient and easy to understand. Sometimes it's tricky to find solution which has the right balance between those two. – Ari Aaltonen Dec 20 '17 at 21:56
  • @ArturBiesiadowski Yes it could be set also. – Ari Aaltonen Dec 20 '17 at 21:57
  • 1
    I see, I'd recommend not using streams then, and either going with Eugene's answer or something `shuffle`-based from the questions I linked to. `Collection.removeAll()` will make it much easier to understand. – Malte Hartwig Dec 20 '17 at 21:57
  • Have you considered a one-time shuffle instead of repeatedly trying to find a random unique item? – shmosel Dec 20 '17 at 23:42
  • Your `Person` class doesn't implement the `equals` method, meaning all people is different, while with the lists of numbers you have a list that is contained into the other one. This means that your test is flawed, which explains the time difference. – fps Dec 21 '17 at 02:03
  • you have to understand that your tests *should* be made with `JMH` - unfortunately I have no time to deal with this. https://stackoverflow.com/questions/504103/how-do-i-write-a-correct-micro-benchmark-in-java – Eugene Dec 21 '17 at 08:09
  • Eugene thanks for tip but I'm pretty sure this kind of time measurement already show enough data to make conclusion which of the implementation is fastest one. – Ari Aaltonen Dec 21 '17 at 08:40

3 Answers3

4

I can think of this, but no idea what-so-ever how it will scale compared to your existing solution:

<T> T notUsedRandomItem(List<T> allItems, List<T> usedItems) {

    // this is only needed because your input List might not support removeAll
    List<T> left = new ArrayList<>(allItems);
    List<T> right = new ArrayList<>(usedItems);

    left.removeAll(right);

    return left.get(new Random().nextInt(left.size()));

}

One thing to keep in mind is that sorted is a stateful operation, so it will sort the entire "diff", but you only retrieve one element from that. Also your Comparator is wrong, for the same two values o1 and o2 you might say they are different - this can break in mysterious ways.

Eugene
  • 117,005
  • 15
  • 201
  • 306
3

You should use HashSets to improve performance:

<T> T notUsedRandomItem(List<T> allItems, List<T> usedItems) {

    Set<T> used = new HashSet<>(usedItems);
    Set<T> all = new HashSet<>(allItems);

    all.removeIf(used::contains); // or all.removeAll(used)

    if (all.isEmpty()) throw new RuntimeException("Did not find item!");

    int skip = new Random().nextInt(all.size());
    Iterator<T> it = all.iterator();
    for (int i = 0; i < skip; i++) it.next();

    return it.next();
}

This removes elements from the all set if they belong to the used set. As Set.removeIf and Set.contains are being used, the removal of elements is optimal w.r.t performance. Then, a random number of elements is skipped in the resulting set, and finally, the next element of the set is returned.


Another approach is to shuffle the all list first and then simply iterate and return the first element that doesn't belong to the used set:

<T> T notUsedRandomItem(List<T> allItems, List<T> usedItems) {

    Set<T> used = new HashSet<>(usedItems);
    List<T> all = new ArrayList<>(allItems);

    Collections.shuffle(all);

    for (T item : all) if (!used.contains(item)) return item;

    throw new RuntimeException("Did not find item!");
}

EDIT: Checking the last snippet of code, I now realize that there's no need to shuffle the whole list. Instead, you could randomize the indices of the allItems list and return the first element that doesn't belong to the used set:

<T> T notUsedRandomItem(List<T> allItems, List<T> usedItems) {

    Set<T> used = new HashSet<>(usedItems);

    return new Random().ints(allItems.size(), 0, allItems.size())
        .mapToObj(allItems::get)
        .filter(item -> !used.contains(item))
        .findAny()
        .orElseThrow(() -> new RuntimeException("Did not find item!"));
}
Eugene
  • 117,005
  • 15
  • 201
  • 306
fps
  • 33,623
  • 8
  • 55
  • 110
  • Last implementation do not compile => incompatible types: bad return type in method reference T cannot be converted to int – Ari Aaltonen Dec 21 '17 at 08:19
  • @AriAaltonen Eugene has edited the answer and fixed that compilation error – fps Dec 21 '17 at 11:56
1

The Comparator you've passed into the sorted intermediate operation seems wrong and strange way to use a Comparator to my eyes anyway; which relates to what @Eugene has mentioned in his post.

Thus, I'd recommend you avoid any type of pitfalls and always use an API the way it's intended to be used; nothing more.

if you really want a random element from the said list; the only way that is possible is to find all the distinct elements of the two lists. so we cannot improve the speed in this aspect.

once this is done we simply need to generate a random integer within the range of the list containing the distinct elements and index into it given there is at least one element contained in it.

Though I have to admit there are probably better ways to accomplish the task at hand without the use of streams; here's how I have modified your code slightly to remove the misuse of .sorted((o1, o2) -> new Random().nextInt(2) - 1).

<T> T notUsedRandomItem(List<T> allItems, List<T> usedItems) {
      List<T> distinctItems =  allItems.stream()
                 .filter(item -> !usedItems.contains(item))
                 .collect(toList());

      if(distinctItems.size() == 0) throw new RuntimeException("Did not find item!");

      return distinctItems.get(new Random().nextInt(distinctItems.size()));
}
Ousmane D.
  • 54,915
  • 8
  • 91
  • 126
  • Tell me what is wrong with my comparator solution to get suffle work by using stream API. I have unit tested my solution and it works fine with the given Integer list example. – Ari Aaltonen Dec 20 '17 at 22:53
  • IntStream.range(1, 1000000).forEach(i -> { Integer result = notUsedRandomItem( Arrays.asList(1, 2, 3, 4, 5), Arrays.asList(1, 3, 4) ); if (!Arrays.asList(2,5).contains(result)) throw new RuntimeException("OMG comparator broken"); }); – Ari Aaltonen Dec 20 '17 at 22:54
  • I tried to find suffle function from steam API but I did not find any so I end up using that comparator way of suffling. But yes you are right using comparator like this is hacky solution and should be avoided. – Ari Aaltonen Dec 20 '17 at 23:25
  • @AriAaltonen For what it's worth, the stream has to iterate over every item in the list in order to sort them. If you instead get a random index in the list, your get operation will be O(1) instead of O(n). The difference is negligible in most cases, but I figure it's worth noting. – FThompson Dec 20 '17 at 23:27
  • @AriAaltonen there's not any `suffle` method in the Stream API that I know of. However, there is a [Collections.suffle](https://docs.oracle.com/javase/7/docs/api/java/util/Collections.html#shuffle(java.util.List)). – Ousmane D. Dec 20 '17 at 23:31