4

In interview for iOS dev position, they asked me about fastest solution to find deleted items in large array ( > 1 million items), i.e: on local, app had had array from server ( we don't care about method to get this array from server). Then, on server, they deleted several items in this array & on local, we didn't know. So, how can we find the deleted items in local, base on new array on server & local array? I think in 02 days later & find this loop below but it just be ok when server deleted items at the beginning or the end of array. When deleted items in the middle of array, this loop is take very long time to complete.

#define rchar (rand() % ('z'-'a') + 'a')

- (void)viewDidLoad :

NSMutableArray * mar = [NSMutableArray new]; // represent server array. 



for (int i = 0; i<50000; i++) //10 50000 200000 400000 600000 800000
{
    NSString * str = [NSString stringWithFormat:@"%c%c%c%c",rchar, rchar, rchar, rchar];
    [mar addObject:str];

}

NSMutableArray *tmpServerArr = [[NSMutableArray alloc] initWithArray:mar copyItems:YES];

NSMutableArray *localArr = [[NSMutableArray alloc]initWithCapacity:50002]; // represent local array
for (int i = 0; i<50002; i++) //10 50000 200000
{
    switch (i) {
        case 32111: [localArr addObject:[NSString stringWithFormat:@"%c%c%c%c",rchar, rchar, rchar, rchar]];

            break;
            case 41234: [localArr addObject:[NSString stringWithFormat:@"%c%c%c%c",rchar, rchar, rchar, rchar]];
            break;
        case 50000: [localArr addObject:mar[32111]];
            break;
        case 50001: [localArr addObject:mar[41234]];
            break;

        default: [localArr addObject:mar[i]];
            break;
    }

}

NSUInteger localremainItem = [localArr count];

NSDate *start;

NSUInteger  loopWhile = 0;
 while (localremainItem > 0 ) {
// while ([localArr count] > 0 ) {


     NSUInteger idxItemStr = (localremainItem -1 - pow(-1, loopWhile)*(localremainItem-1))/2; // -1;
     NSLog(@"idxstring: %li -- pow: %f", idxItemStr, pow(-1, loopWhile));
     NSString *itemStr = localArr[idxItemStr];
      NSLog(@"item checked : %@]", itemStr);
      NSPredicate *pre = [ NSPredicate predicateWithFormat:@"NOT (SELF like %@)", itemStr];
     NSCountedSet * localCountedSet = [[ NSCountedSet alloc] initWithArray:localArr];
     NSCountedSet * serverCountedSet = [[ NSCountedSet alloc] initWithArray:tmpServerArr];

     if ([localCountedSet countForObject:itemStr] > [serverCountedSet countForObject:itemStr]) {
         NSLog(@"this item %@ was deleted in server.", itemStr);

     }
     //else {
         [localArr filterUsingPredicate: pre];
         [tmpServerArr filterUsingPredicate:pre];
         NSLog(@"localremain: %li", [localArr count]);
         NSLog(@"tmpserverremain: %li", [tmpServerArr count]);
    // }

     if ([localArr count] == [tmpServerArr count]) {
         localremainItem = 0;
     }
     else localremainItem = [localArr count];

      loopWhile +=1;
 }

NSLog(@"now server arr is controled! time: %f", -[start timeIntervalSinceNow]);

Thank you for your help.
** Update Nov 14, 2015: this loop below is 8 times faster:
in viewDidLoad:

while ([localArr count] >0) {

    NSUInteger maxRange =0;
if ([localArr count] >500) {
    maxRange = 500;
}else maxRange = [localArr count];


    NSArray *sampleTest = [[NSArray alloc] initWithArray:[localArr subarrayWithRange:NSMakeRange(0,maxRange)]];
    NSLog(@"sampleTest cnt: %lu", (unsigned long)[sampleTest count]);

if ([self countSampleTest:localArr serverArr:tmpServerArr sampleTest:sampleTest]) {

    NSPredicate *pre = [ NSPredicate predicateWithFormat:@"NOT (SELF in %@)", sampleTest];
    [localArr filterUsingPredicate: pre];
    [tmpServerArr filterUsingPredicate:pre];
    NSLog(@"localremain: %li", [localArr count]);
    NSLog(@"tmpserverremain: %li", [tmpServerArr count]);
     }
else {
    NSLog(@"deleted item in here!: %i, %li", 0, [sampleTest count]); //[localArr count]/10);
    int found = 0;
    while (found <10) {
        maxRange =0;
        if ([localArr count] >50) {
            maxRange = 50;
        }else maxRange = [localArr count];


        NSArray *sampleTest1 = [[NSArray alloc] initWithArray:[localArr subarrayWithRange:NSMakeRange(0,maxRange)]];
        NSLog(@"sampleTest1 cnt: %lu", (unsigned long)[sampleTest1 count]);
        if ([self countSampleTest:localArr serverArr:tmpServerArr sampleTest:sampleTest1]) {


            NSPredicate *pre = [ NSPredicate predicateWithFormat:@"NOT (SELF in %@)", sampleTest1];
            [localArr filterUsingPredicate: pre];
            [tmpServerArr filterUsingPredicate:pre];
            NSLog(@"localremain: %li", [localArr count]);
            NSLog(@"tmpserverremain: %li", [tmpServerArr count]);
        }
        else {
            NSUInteger k = 0;
            while (k< [sampleTest1 count]) {

                NSLog(@"item checked %lu : %@]", (unsigned long)k, sampleTest1[k]);
                NSCountedSet * localCountedSet = [[ NSCountedSet alloc] initWithArray:localArr];
                NSCountedSet * serverCountedSet = [[ NSCountedSet alloc] initWithArray:tmpServerArr];

                if ([localCountedSet countForObject:sampleTest1[k]] > [serverCountedSet countForObject:sampleTest1[k]]) {
                    NSLog(@"this item %@ was deleted in server.", sampleTest1[k]);
                }
                NSPredicate *pre = [ NSPredicate predicateWithFormat:@"NOT (SELF like %@)", sampleTest1[k]];
                [localArr filterUsingPredicate: pre];
                [tmpServerArr filterUsingPredicate:pre];
                NSLog(@"localremain: %li", [localArr count]);
                NSLog(@"tmpserverremain: %li", [tmpServerArr count]);
                if ([localArr count] == [tmpServerArr count]) {
                    k = [sampleTest1 count];
                    found = 10;
                }else k +=1;
            }
        }
        found +=1;
    }
}


     if ([localArr count] == [tmpServerArr count]) {
         localArr = nil;
     }
    // if ([sampleTest count] == 0) {
    //     localArr = nil;
    // }

}

NSLog(@"this stuck is done!");//end viewDidLoad.  
-(BOOL)countSampleTest: (NSMutableArray *)localArr serverArr:(NSMutableArray *)tmpServerArr sampleTest:(NSArray *)sampleTest{
BOOL tf;
int cntSampleLoc = 0;
int cntSampleServer = 0;

for (int i = 0 ; i < [sampleTest count]; i++) {
    NSCountedSet * localCountedSet = [[ NSCountedSet alloc] initWithArray:localArr];
    NSCountedSet * serverCountedSet = [[ NSCountedSet alloc] initWithArray:tmpServerArr];
    cntSampleLoc += [localCountedSet countForObject:sampleTest[i]];
    //if (localArr[i] in tmpServerArr ) {
    cntSampleServer +=[serverCountedSet countForObject:sampleTest[i]];
    // }
}

if (cntSampleServer == cntSampleLoc) {
    tf= true;
}else tf = false;
return tf;}
Nik
  • 176
  • 1
  • 9
  • Personally I'd tell them this is a horrible implementation :) It seems much better to pass along the deltas than the entire array or to search thru the arrays. – Mike M Nov 12 '15 at 18:20
  • @MikeM : they are boss, I can't tell them like that. Many thank. – Nik Nov 12 '15 at 19:40
  • @Nik, you definitely *can* and *should* tell someone that an implementation is bad if you can back it up, provide an option for making it better and present your findings in a professional manner. Nobody likes being told that they're wrong, but I'm sure a lot of people will appreciate having a better tool in the end. – James Webster Feb 08 '16 at 09:28

4 Answers4

2

First solution that comes to my mind is quite easy:

NSArray *serverArray;
NSMutableArray *localArray;

// This will give you the difference.
[localArray removeObjectsInArray:serverArray];

Docs say:

removeObjectsInArray:

This method is similar to removeObject:, but it allows you to efficiently remove large sets of objects with a single operation.

Rafał Sroka
  • 39,540
  • 23
  • 113
  • 143
  • Easy is not the fastest. The question is to find the fastest solution. – rmaddy Nov 12 '15 at 15:27
  • @Rafal Sroka: I read comment of user1032657 at [link](http://stackoverflow.com/questions/111866/best-way-to-remove-from-nsmutablearray-while-iterating) , so maybe i will check your solution later. Many tks. – Nik Nov 12 '15 at 16:26
  • @Nik, definitely, check other solutions and update us with your findings. It's a very interesting question. – Rafał Sroka Nov 12 '15 at 16:27
1

You could also us NSSet and NSMutableSet which will be faster for large datasets:

NSMutableSet *serverSet = [NSMutableSet setWithArray: serverArray];
NSSet *localSet = [NSSet setWithArray: localArray];

[serverSet minusSet: localSet];

NSArray *deletedObjects = [serverSet allObjects];

See also this SO question for more on the performance differences between NSArray and NSSet.

Community
  • 1
  • 1
tilo
  • 14,009
  • 6
  • 68
  • 85
  • The glitch with this solution is that converting set to array removes duplicates. Set classes do not allow duplicates. – Rafał Sroka Nov 12 '15 at 16:29
  • True. But this shouldn't be an issue when searching for removed items. Btw: your answer using `removeObjectsInArray:` would also remove duplicates. – tilo Nov 12 '15 at 16:40
  • @tilo: I tested your suggestion, it works fine when server deletes item which isn't duplicate. Many tks. – Nik Nov 12 '15 at 19:37
0

Without resorting to writing an algorithm:

Keeping your arrays sorted is going to help.

Imagine looking for the word "mouse" in a dictionary. If you start at "a" and just check if every word is "mouse", it'll take you forever. But knowing that "m" words are gonna be in the middle-ish, and that "o" is in the middle-ish of the "m"s means you can find your word much faster.

In an interview, this has the added benefit of showing that you know how to think outside the scope of the question as written and gives you a chance to show off about other things you know.

James Webster
  • 31,873
  • 11
  • 70
  • 114
  • if we sort array, we take a lot of time for sort method? – Nik Nov 12 '15 at 16:27
  • If you sort your array every time (and depending on the algorithm you use), **yes**. If you insert new elements in the right place every time, then both entries and searches (and subsequent deletions) can be done in `O(log n)` time. – James Webster Nov 12 '15 at 16:32
0
  1. Create a Hashmap (Dictionary) of both local and server arrays.
  2. Loop through local array and see if item exists in server array.

O(N + N + N) = O(3N) = 0(N)

Chris Fremgen
  • 4,649
  • 1
  • 26
  • 26