Instance Memory Issues with Scheduled Job

JayGervais
Kilo Sage

Hi there,

 

I have been working on a scheduled job to import chromebooks using the Google API into the cmdb_ci_computer table. It has been working okay but everytime it runs until our instance runs out of memory and crashes. I have tried numerous ways to try and improve this such as script creating an async import set, filling it with data, and then processing the transform. This resulted in it half-completing, the node crashing, and it restarting each time.

I have tried creating an array of data and then processing it all at once. This resulted in the system crashing before adding it to an import set. I have also tried numerous other ways of scripting this with the same outcome each time. There is only ~50k records, which doesn't seem like it would be that much, so I am unsure of what else I can do to improve this.

 

This is my current script. My attempt here was to batch each set of inserts and clear the variables after each iteration to make sure they aren't building up in memory. I would love any insight or suggestions that might make this possible.

 

try {
    getMachines(null);
} catch (ex) {
    var message = ex.message;
    gs.info('google import ERROR: ' + message);
}

function getMachines(endpoint) {

    var fieldsToImport = [
        'serialNumber',
        'annotatedUser',
        'bootMode',
        'deviceId',
        'etag',
        'firmwareVersion',
        'lastEnrollmentTime',
        'lastSync',
        'macAddress',
        'model',
        'orgUnitId',
        'orgUnitPath',
        'osVersion',
        'platformVersion',
        'status',
        'ipAddress'
    ];

    var pagedR = new sn_ws.RESTMessageV2('Google Device API', 'Default GET'); // REST Message name
    if (endpoint !== null) {
        pagedR.setEndpoint(endpoint);
    }
    var pagedResponse = pagedR.execute();
    var pagedResponseBody = pagedResponse.getBody();
    var pagedObj = JSON.parse(pagedResponseBody);
    var chromeDeviceRaw = pagedObj.chromeosdevices;

    chromeDeviceRaw.sort(function (a, b) {
        return a.lastEnrollmentTime.localeCompare(b.lastEnrollmentTime);
    });

    var chromeDeviceArr = removeDuplicates(chromeDeviceRaw, "serialNumber");

    var devices = [];

    for (var j = 0; j < chromeDeviceArr.length; j++) {
        var chromeDevice = {};
        for (var i = 0; i < fieldsToImport.length; i++) {
            if (fieldsToImport[i] == 'ipAddress') {
                if (chromeDeviceArr[j]['lastKnownNetwork']) {
                    chromeDevice['ipAddress'] = chromeDeviceArr[j]['lastKnownNetwork'][0]['ipAddress'];
                }
            } else {
                chromeDevice[fieldsToImport[i]] = chromeDeviceArr[j][fieldsToImport[i]];
            }
        }
        devices.push(chromeDevice);
    }

    batchInsert(devices);

    if (pagedObj.nextPageToken) {
        var apiURL = pagedR.getEndpoint();
        if (apiURL.indexOf('&pageToken=') > -1) {
            var nextPageEndpoint = apiURL.substring(0, apiURL.indexOf('&pageToken='));
        } else {
            nextPageEndpoint = apiURL;
        }
        var nextPageURL = nextPageEndpoint + '&pageToken=' + pagedObj.nextPageToken;
        getMachines(nextPageURL);
    }

    pagedR = null;
    pagedResponse = null;
    pagedResponseBody = null;
    pagedObj = null;
    chromeDeviceRaw = null;
    chromeDeviceArr = null;
    devices = [];
}

function batchInsert(devices) {
    var batchInsertSize = 10; // Example batch insert size
    for (var v = 0; v < devices.length; v += batchInsertSize) {
        var batch = devices.slice(v, v + batchInsertSize);
        var batchInsert = new GlideRecord('u_google_chrome_device_import');
        for (var g = 0; g < batch.length; g++) {
            var device = batch[g];
            if (device) {
                batchInsert.initialize();
                for (var key in device) {
                    var field = 'u_' + key.toLowerCase();
                    var value = device[key];
                    batchInsert.setValue(field, value);
                    gs.addInfoMessage(field + ' - ' + value);
                }
                batchInsert.insert();
            }
        }
    }
}

function removeDuplicates(originalArray, prop) {
    var newArray = [];
    var lookupObject = {};
    for (var i in originalArray) {
        lookupObject[originalArray[i][prop]] = originalArray[i];
    }
    for (i in lookupObject) {
        newArray.push(lookupObject[i]);
    }
    return newArray;
}
3 REPLIES 3

James Chun
Kilo Patron

Hi @JayGervais,

 

Is it possible to add pagination to the REST API?

Do you have any script running on the Transform Map? If so, can you share them as well?

 

Just trying to understand where it's taking all the resources, is it during the data import, data transformation, or both?

 

Cheers

Hi Kilo, 

I have the script being paginated, which is why I am looping through the function in my scheduled job script. I have all the transform scripts disabled to rule that part out, so it is just adding to a few fields. 

One issue I have is that the objects from Google are massive but I can't use the reduced version because it doesn't include everything we need.

Cheers

SteveMacWWT
Kilo Sage

@JayGervais Try taking a look at this article from SNProTips about using 'Event Driven Recursion' to resolve the issue:

 

https://snprotips.com/blog/2018/10/11/how-to-do-massive-slow-database-operations-without-slowing-dow...