Fix script to remove duplicate records from table

Bart Z
Tera Contributor

Hello guys. I have over 100k of duplicated CIs on instance on cmdb_ci table. I am using a fix script right now which works, but is deadly slow - it is deleting around 10 duplicates per minute. I posted the script below - I'm looking for a recommendations on optimizing it or even changing totally.

var ans = getDuplicates('cmdb_ci', 'u_persid');
//gs.print("ans:"+ans.length);

var resultOldRecords = getOldRecord(ans);

//gs.print("OLD CI count: "+resultOldRecords.length);
//gs.print("OLD CI names: "+resultOldRecords);
function getOldRecord(ans) {

    var oldRecords = [];
    for (var i = 0; i < ans.length; i++) {
        var grCI = new GlideRecord('cmdb_ci');
        grCI.addQuery('u_persid', ans[i]);
        grCI.orderBy('sys_created_on');
        grCI.setLimit(1);
        grCI.query();
        if (grCI.next()) {
            oldRecords.push(grCI.name); //pushing name of CI
			grCI.deleteRecord();
			//gs.print(grCI.sys_id);
        }

    }
    return oldRecords;

}

function getDuplicates(tablename, val) {
    var dupRecords = [];
    var gaDupCheck = new GlideAggregate(tablename);
    //gaDupCheck.addQuery('active','true');
    gaDupCheck.addAggregate('COUNT', val);
    gaDupCheck.addNotNullQuery(val);
    gaDupCheck.groupBy(val);
    gaDupCheck.addHaving('COUNT', '>', 1);
    gaDupCheck.query();
    while (gaDupCheck.next()) {
        dupRecords.push(gaDupCheck[val].toString());
    }
    return dupRecords;
}
3 REPLIES 3

Ankur Bawiskar
Tera Patron
Tera Patron

Hi,

did you check you are having some business rules which run before/after delete which slows down the performance?

if you wish to exclude the BRs then you can use setWorkflow(false) on the gliderecord object to avoid those.

Regards
Ankur

Regards,
Ankur
✨ Certified Technical Architect  ||  ✨ 9x ServiceNow MVP  ||  ✨ ServiceNow Community Leader

I don't see anything related

DrewW
Mega Sage
Mega Sage

Ok so here are just some thoughts and number 1 will probably help the most if its not already there.

1 - Create an index on the u_persid column.  That will allow you to query much faster

2 - Limit the number of records you do at a time.  Pushing 100k records into an array over time is going to slow things down some.  Also if you break it up to say consider 100 dup records at a time you will not have to worry about the script crashing after 24 hours.

3 - Do you really need the dupRecords returned?  If you are just hoping for a count then just setup a count var and increment it.

4 - You can use setWorkflow(false) like Ankur suggested to speed things up.

5 - Change the code to use delete multiple to remove steps, but the savings here is probably not going to be much

var grCI = new GlideRecord('cmdb_ci');
grCI.addQuery('u_persid', ans[i]);
grCI.orderBy('sys_created_on');
grCI.setLimit(1);
grCI.query();
grCI.deleteMultiple();