ray33
Tera Contributor

In the quest for efficiency on partition table queries, I made a discovery which might be worth replicating and expanding into a partition table query feature in backend java code

Unions are _slow_ and avoiding them is essential for anything that needs to be efficient when partitioned tables have any significant amount of data.

There are several ways servicenow already mitigates the performance penalty when possible.

1. Using a date range in your query will limit the number of partitions pulled into the union. For a short while, this seemed to only work with a BETWEEN query. After or before was still a union all partitions.

2. Indexes.

This way does something interesting in that it just gets the newest results from the first partition it finds matching records on. Instead of assembling a union to query across, it searches from newest to oldest across partitions which turns it into something that streams through results to find matches. 

The obvious limitation is that if part0001 has 50 rows and you want 100, you get 50. It doesn't keep moving onto part0002 to pack more records into the result set. That would probably be possible in java and more efficient than the union on large tables when querying without effective indexing.

I would love it if any of you happen to be a MariaDB expert and can explain the ugly union mechanics on the back end. This same query with GlideRecord doesn't even finish on our systems in any amount of time I'm willing to wait, and if you do a between to limit the parititions queried it still takes a lot longer.

// sys.scripts.do background script
var logGr = new PartClever().findMostRecentMatching('syslog','messageLIKEexception',10) ;
logGr.next();
var startts = Date.now()
gs.info("================ Test result with findMostRecentMatching");
gs.info("ms: {0}, RowCount {1}, First Message {2}", [Date.now()-startts,logGr.getRowCount(),logGr.message.substring(0,150)+"..."]);

/* Output:
*** Script: getOrderedPartitions sysparm_query=table_nameSTARTSWITHsyslog0^offline=false^valid_from<=2022-08-26 14:28:39^ORDERBYvalid_from hasResult=true completed_ms=1.0
Time: 0:00:02.783 id: uhcdev_1[glide.22] primary_hash=-1940221405 (connpid=314895) for: SELECT syslog00030.`sys_id`, syslog00030.`source_application_family`, syslog00030.`level`, syslog00030.`sys_created_on`, syslog00030.`source_package`, syslog00030.`source`, syslog00030.`message`, syslog00030.`sys_class_name`, syslog00030.`sys_created_by` FROM syslog0003 syslog00030  WHERE syslog00030.`message` LIKE '%exception%' limit 0,10 /* uhcdev007, gs:61DBD3E11B255558A9741F861A4BCB11, tx:6e9befe91ba59558a9741f861a4bcb31, hash:-1940221405 */ 
*** Script: findMostRecentMatching(syslog,messageLIKEexception,10 sysparm_query=messageCONTAINSexception^ORDERBYDESCsequence hasResult=true completed_ms=2786.0 partitions_queried=1.0
*** Script: ms: 0.0, RowCount 10, First Message Unable to load certificate : ATF Certificate ...
*/

The version below was extracted from other code not shared but I think I fixed any on object refs.

/**
 * @class
 * @example
 * new PartClever().findMostRecentMatching('syslog','messageLIKEexception',10) ;
 *                               -> {GlideRecordSet} // GlideRecord Object with most syslogs where message contains exception. 
 */
var PartClever = Class.create(),
PartClever.prototype={
   initialize: function(){};  
   /**
     * Provides ordered list of underlying physical partition tables for direct query. Used by findMostRecentMatching to
     * avoid UNION query inneficiency. 
     * @param {string} table_name the name of a rotated table.
     * @returns {Array} a list of tables in the rotation from oldest to newest.
     */
    getOrderedPartitions: function(table_name){
        var r = [ ];
        var encquery = 'table_nameSTARTSWITH'+table_name+'0^offline=false^valid_from<='+new GlideDateTime().getValue();
        var trsgr = new GlideRecord('sys_table_rotation_schedule');
        trsgr.addEncodedQuery(encquery);
        trsgr.orderBy('valid_from');
        trsgr.query();
        var sts = Date.now();
        gs.info("getOrderedPartitions sysparm_query={0} hasResult={1} completed_ms={2}", 
                     [trsgr.getEncodedQuery(),trsgr.hasNext(),Date.now()-sts]);
        while (trsgr.next()) r.push(trsgr.getValue('table_name'));
        return r;
    },
    /**
     * Queries underlying paritions of rotated table_name in order from most to least recent stopping and returning results from
     * the first partition with matching rows.
     * @param {string} table_name 
     * @param {string} encodedQuery 
     * @param {number} limit 
     * @returns {GlideRecord} a GlideRecord object with an active query completed and results ready to page through via .next().
     */
    findMostRecentMatching: function(table_name,encodedQuery,limit){
        var tables = this.getOrderedPartitions(table_name);
        if (tables.length==0) this.log.warn("No partitions found for ",table_name);
        var r,eccgr,partition;
        var query_count=0;
        var sts = Date.now();
        while (typeof r == 'undefined' && tables.length>0){
            query_count++;
            partition = tables.pop();
            eccgr = new GlideRecord(partition);
            eccgr.addEncodedQuery(encodedQuery);
            eccgr.orderByDesc('sequence');
            if (typeof limit == 'number') eccgr.setLimit(limit);
            eccgr.query();
            if (eccgr.hasNext()) r = eccgr ;
        }
        if (typeof r == 'undefined') {
            var m = "Unable to find matching record for "+encodedQuery;
            this.log.error(m);
            if (gs.isInteractive()) gs.addErrorMessage(m);
            r = eccgr;
        } else {
            gs.info(
                "findMostRecentMatching({0},{1},{2} sysparm_query={3} hasResult={4} completed_ms={5} partitions_queried={6}",
                [table_name,encodedQuery,limit,r.getEncodedQuery(),r.hasNext(),Date.now()-sts,query_count]);
        }
        return r ;
    },
    type: PartClever
}
Version history
Last update:
‎08-26-2022 07:16 AM
Updated by: