Fix script to remove duplicate records from table
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
‎11-23-2021 06:07 AM
Hello guys. I have over 100k of duplicated CIs on instance on cmdb_ci table. I am using a fix script right now which works, but is deadly slow - it is deleting around 10 duplicates per minute. I posted the script below - I'm looking for a recommendations on optimizing it or even changing totally.
var ans = getDuplicates('cmdb_ci', 'u_persid');
//gs.print("ans:"+ans.length);
var resultOldRecords = getOldRecord(ans);
//gs.print("OLD CI count: "+resultOldRecords.length);
//gs.print("OLD CI names: "+resultOldRecords);
function getOldRecord(ans) {
var oldRecords = [];
for (var i = 0; i < ans.length; i++) {
var grCI = new GlideRecord('cmdb_ci');
grCI.addQuery('u_persid', ans[i]);
grCI.orderBy('sys_created_on');
grCI.setLimit(1);
grCI.query();
if (grCI.next()) {
oldRecords.push(grCI.name); //pushing name of CI
grCI.deleteRecord();
//gs.print(grCI.sys_id);
}
}
return oldRecords;
}
function getDuplicates(tablename, val) {
var dupRecords = [];
var gaDupCheck = new GlideAggregate(tablename);
//gaDupCheck.addQuery('active','true');
gaDupCheck.addAggregate('COUNT', val);
gaDupCheck.addNotNullQuery(val);
gaDupCheck.groupBy(val);
gaDupCheck.addHaving('COUNT', '>', 1);
gaDupCheck.query();
while (gaDupCheck.next()) {
dupRecords.push(gaDupCheck[val].toString());
}
return dupRecords;
}
- Labels:
-
Scripting and Coding
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
‎11-23-2021 06:14 AM
Hi,
did you check you are having some business rules which run before/after delete which slows down the performance?
if you wish to exclude the BRs then you can use setWorkflow(false) on the gliderecord object to avoid those.
Regards
Ankur
Ankur
✨ Certified Technical Architect || ✨ 9x ServiceNow MVP || ✨ ServiceNow Community Leader
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
‎11-23-2021 06:34 AM
I don't see anything related

- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
‎11-23-2021 06:57 AM
Ok so here are just some thoughts and number 1 will probably help the most if its not already there.
1 - Create an index on the u_persid column. That will allow you to query much faster
2 - Limit the number of records you do at a time. Pushing 100k records into an array over time is going to slow things down some. Also if you break it up to say consider 100 dup records at a time you will not have to worry about the script crashing after 24 hours.
3 - Do you really need the dupRecords returned? If you are just hoping for a count then just setup a count var and increment it.
4 - You can use setWorkflow(false) like Ankur suggested to speed things up.
5 - Change the code to use delete multiple to remove steps, but the savings here is probably not going to be much
var grCI = new GlideRecord('cmdb_ci');
grCI.addQuery('u_persid', ans[i]);
grCI.orderBy('sys_created_on');
grCI.setLimit(1);
grCI.query();
grCI.deleteMultiple();