Fix script to remove duplicate records from table

Bart Z
Tera Contributor

Hello guys. I have over 100k of duplicated CIs on instance on cmdb_ci table. I am using a fix script right now which works, but is deadly slow - it is deleting around 10 duplicates per minute. I posted the script below - I'm looking for a recommendations on optimizing it or even changing totally.

var ans = getDuplicates('cmdb_ci', 'u_persid');
//gs.print("ans:"+ans.length);

var resultOldRecords = getOldRecord(ans);

//gs.print("OLD CI count: "+resultOldRecords.length);
//gs.print("OLD CI names: "+resultOldRecords);
function getOldRecord(ans) {

    var oldRecords = [];
    for (var i = 0; i < ans.length; i++) {
        var grCI = new GlideRecord('cmdb_ci');
        grCI.addQuery('u_persid', ans[i]);
        grCI.orderBy('sys_created_on');
        grCI.setLimit(1);
        grCI.query();
        if (grCI.next()) {
            oldRecords.push(grCI.name); //pushing name of CI
			grCI.deleteRecord();
			//gs.print(grCI.sys_id);
        }

    }
    return oldRecords;

}

function getDuplicates(tablename, val) {
    var dupRecords = [];
    var gaDupCheck = new GlideAggregate(tablename);
    //gaDupCheck.addQuery('active','true');
    gaDupCheck.addAggregate('COUNT', val);
    gaDupCheck.addNotNullQuery(val);
    gaDupCheck.groupBy(val);
    gaDupCheck.addHaving('COUNT', '>', 1);
    gaDupCheck.query();
    while (gaDupCheck.next()) {
        dupRecords.push(gaDupCheck[val].toString());
    }
    return dupRecords;
}
3 REPLIES 3

Ankur Bawiskar
Tera Patron
Tera Patron

Hi,

did you check you are having some business rules which run before/after delete which slows down the performance?

if you wish to exclude the BRs then you can use setWorkflow(false) on the gliderecord object to avoid those.

Regards
Ankur

Regards,
Ankur
Certified Technical Architect  ||  9x ServiceNow MVP  ||  ServiceNow Community Leader

I don't see anything related

DrewW
Mega Sage

Ok so here are just some thoughts and number 1 will probably help the most if its not already there.

1 - Create an index on the u_persid column.  That will allow you to query much faster

2 - Limit the number of records you do at a time.  Pushing 100k records into an array over time is going to slow things down some.  Also if you break it up to say consider 100 dup records at a time you will not have to worry about the script crashing after 24 hours.

3 - Do you really need the dupRecords returned?  If you are just hoping for a count then just setup a count var and increment it.

4 - You can use setWorkflow(false) like Ankur suggested to speed things up.

5 - Change the code to use delete multiple to remove steps, but the savings here is probably not going to be much

var grCI = new GlideRecord('cmdb_ci');
grCI.addQuery('u_persid', ans[i]);
grCI.orderBy('sys_created_on');
grCI.setLimit(1);
grCI.query();
grCI.deleteMultiple();