1

A bug in the google drive app resulted in a user accidentally orphaning thousands of documents that were in a shared folder.

By looking at the Admin console, I'm able to get the IDs of those documents, but not the IDs of the folders in question (I get the names of the folders, but folder names are not unique and I'm dealing with thousands of documents and hundreds of folders)

If any given user searches for the orphaned document, they can find the document and, by clicking on it, see in the "Activity" pane the information about when the document was removed from its home and where it was removed from.

I'm thinking my best crack at scripting a solution to this is:

For each google document ID...
   -> Access the activity history and grab the "removed from folder" event
   -> Get the FolderID from that event (surely that exists and there's API access, right?)
   -> Reparent the folder

The question is how to get the API access I need to the FolderID.

Note that in the meantime, I have the following code which is sometimes but not always able to get me the parent folders (it seems to fail often enough as to be useless for my overall problem)

function recoverFile (id) {
  Logger.log('recoverFile called with id='+id);
  try {
    f = DriveApp.getFileById(id);
  }
  catch (exc) {
    Logger.log('Error getting file '+id+'- no permission?');
    return false
  }
  Logger.log('Successfully got file: '+f);  
  parents = f.getParents()
  if (parents.hasNext()) {
    p = parents.next();  
  }
  else {
    Logger.log('Document has no parents');
    return false
  }
  all_parents = [p]
  Logger.log('We have parents: '+p);
  while (p) {
    p.addFile(f)
    all_parents.push(p)
    Logger.log(p);
    if (parents.hasNext()) {
      p = parents.next()
    }
    else {
      p = false
    }

  }  
  return {'file':f, 'parents':all_parents}
} 

Note that the other problem with this code is that it slows down enormously after a few runs -- the try/catch on the getId() is killing me, but I don't know who owns each file, so I have to try to find out.

Tom Hinkle
  • 61
  • 7
  • curious about the bug that bit you: by any chance did the user configure the drive sync client to only sync certain folders (not the default all folders) ? – Zig Mandel Sep 17 '15 at 03:34
  • Zig -- no, the reverse. We have some massive shared folders, so we try to avoid users having it set to all folders because it will fill up their hard drive. This student had it set to sync all folders. – Tom Hinkle Sep 17 '15 at 03:37
  • Zig -- I'm getting much closer to a fix now, if it should ever bite you. The key has been going to the Admin Reporting API which can give you details about what has happened (this may well not have existed in this form years ago though) – Tom Hinkle Sep 17 '15 at 04:13
  • indeed that report didnt exist back then or didnt display changes done by api (as oposed to using the webapp) – Zig Mandel Sep 17 '15 at 06:58

1 Answers1

1

The Admin Reports API is the key to an actual solution: https://developers.google.com/admin-sdk/reports/v1/reference/activity-ref-appendix-a/drive-event-names

Using that API, it is possible to pull out data on folders that have been orphaned. Once you have that data, it's relatively straightforward to make a script that reads doc IDs and folder IDs and puts the docs back in the folder, though that script has to be run per-creator of the documents in question.

Here's a snipped of the script that grabs the data out of the API:

    function generateLoginActivityReport() {
        var now = new Date();
        // Assumes a one week time horizon -- modify as you see fit...
        var oneWeekAgo = new Date(now.getTime() - 8 * 24 * 60 * 60 * 1000);
        var startTime = oneWeekAgo.toISOString();  
        var endTime = now.toISOString();

        var rows = [];
        var pageToken, page;
        do {
            /*
            page = AdminReports.Activities.list('all', 'login', {
                startTime: startTime,
                endTime: endTime,
                maxResults: 500,
                pageToken: pageToken
            });
            */
            page = AdminReports.Activities.list('name-of-user-who-accidentally-borked-drive@innovationcharter.org','drive', {
                startTime:startTime,
                endTime:endTime,
                maxResults: 200,
                eventName: 'remove_from_folder',
                //name: 'remove_from_folder',
                pageToken: pageToken});

            var items = page.items;
            if (items) {
                for (var i = 0; i < items.length; i++) {
                //for (var i = 0; i < 2; i++) {
                    var item = items[i];
                    // Get the parameters out of the mess of crap that is their parameters...
                    // Google serves them up this way [{} {} {} {}] instead of in a normal
                    // object like we're going to create in params
                    params = {}
                    for (var idx=0; idx < item.events[0].parameters.length; idx++) {
                        param = item.events[0].parameters[idx]
                        var val = false
                        if (param['multiValue']) {
                            var val = param['multiValue']
                            }
                        if (param['value']) {
                            var val = param['value']
                            }
                        if (param['boolValue']) {
                            var val = param['boolValue']
                        }
                        params[param['name']] = val
                    }
                    // Now go ahead and push a row to the spreadsheet with all
                    // the data we need
                    var row = [
                        new Date(item.id.time),
                        item.actor.email,
                        item.events[0].name,
                        params['doc_title'],
                        params['doc_id'],
                        params['source_folder_title'],
                        params['source_folder_id'],
                        params['owner'], // You'll need the owner because the
                                                         // owner has to recover the file
                    ];
                    rows.push(row);
                }
            }
            pageToken = page.nextPageToken;
        } while (pageToken);

        if (rows.length > 0) {
            var sheet = SpreadsheetApp.getActiveSpreadsheet().insertSheet('Files Removed from Folder');
            // Append the headers.
            var headers = ['Time', 'User', 'Event','Doc Title','Doc','Folder Title','Folder','Owner'];
            sheet.appendRow(headers);
            // Append the results.
            sheet.getRange(2, 1, rows.length, headers.length).setValues(rows);
            //Logger.log('Report spreadsheet created: %s', spreadsheet.getUrl());
        } else {
            Logger.log('No results returned.');
        }
    }

That lovely script generates a spreadsheet with the data we need to use to pull Data. Here's part 2 of the program -- a script to go through that data row by row and recover the files we can recover...

    function recoverSheet () {

        DOCID = 5; // 1-INDEXED COL HEADERS
        FOLDERID = 7;
        OWNER = 8;
        RECOVERED = 9;
        DOC_LINK = 10;
        FOLDER_LINK = 11;
        EXTRA_PARENTS = 12;

        sheet = SpreadsheetApp.getActiveSheet()
        total_range = sheet.getDataRange();
        for (var rownum=2; rownum<=total_range.getLastRow(); rownum++) {
            // Iterate through each row...
            var range = sheet.getRange(
                'A'+rownum+':'+'I'+rownum // Where your data is...
            )
            docID=range.getCell(1,DOCID).getValue();
            if (range.getCell(1,RECOVERED).getValue()) {
                Logger.log('Skipping '+docID+' - already recovered');
            }
            else {
                if (range.getCell(1,OWNER).getValue()==currentUser) {
                    folderID = range.getCell(1,FOLDERID).getValue();
                    try {
                        folder = DriveApp.getFolderById(folderID); // get folder...
                        file = DriveApp.getFileById(docID);
                    }
                    catch (exc) {
                        Logger.log('Failed to get file or folder :( for docID '+docID+' folderID '+folderID);
                        range.getCell(1,RECOVERED).setValue('FAILED');
                        file = false
                    }
                    if (file) {
                        folder.addFile(file);
                        // Add nice little links to make it handy to see what we recovered
                        sheet.getRange(rownum,RECOVERED,1,1).setValue('True'); // Set Recovered
                        sheet.getRange(rownum,DOC_LINK,1,1).setValue('https://drive.google.com/open?id='+docID);
                        sheet.getRange(rownum,FOLDER_LINK,1,1).setValue('https://drive.google.com/open?id='+folderID);
                        recovery = recoverFile(docID) // Do any extra recovery we can...
                        if (recovery) {
                            Logger.log('Successful extra recovery: '+recovery);      
                        //range.getCell(1,4).setValue(recovery.file.getOwner().getEmail()); // set Owner of document                         
                            for (var i=0; i<recovery.parents.length; i++) {              
                                sheet.getRange(rownum,EXTRA_PARENTS+i).setValue('https://drive.google.com/open?id='+recovery.parents[i].getId());
                            }
                        } 
                    }
                }}
        }
    }
Tom Hinkle
  • 61
  • 7