- Post History
- Subscribe to RSS Feed
- Mark as New
- Mark as Read
- Bookmark
- Subscribe
- Printer Friendly Page
- Report Inappropriate Content
04-17-2024 01:29 PM - edited 04-22-2024 02:31 AM
Introduction
Knowledge Demand insights has been a capacity of the platform since long before the rise of GenAI
If you're entitled to use it, it can help identify the gaps in your Knowledge Base.
It does this by analyzing both your Knowledge articles and your Case (or Incident) records.
Be sure to check out this great article by @Lener Pacania1 for a detailed walk through how to configure this capability.
At a PoC at a customer I worked with, we wanted to automatically generate Knowledge Articles for the identified gaps, by sending the data to the LLM.
Knowledge Article Templates.
The situation we encountered was that the customer was heavily leveraging Knowledge Article Templates.
This means that besides the kb_knowledge table, many extended tables are used, where the data is stored in different tables and fields, besides the text field where it is classically stored. Besides, it is often stored as HTML, with all the overhead, that maybe hard to process and not even send to the Machine Learning solution.
The Solution
To work around this problem. (as of spring 2024), what we did is add a large string field to the base table kb_knowledge called u_flat_article_text, along with that we created a script include: KnowledgeNormalize in global scope. (code at end of this article).
The top of this SI, contains the definition of the fields per extended table.
When you want to use this, you must extend the JSON with the table names and content fields.
What this SI does is:
- Clean all HTML and only keeps the flat text of the article
- Combine all flatted fields per article and store that in the u_flat_article_text field
The first time you use this, you should run the normalizeAllPublishedArticles in a background or fix script.
var kn = new global.KnowledgeNormalize();
kn.normalizeAllPublishedArticles();
This will iterate over all published articles and fill the u_flat_article_text field.
After that, I would recommend creating a before BR on kb_knowledge with code like below
var kn = new global.KnowledgeNormalize();
kn.normalizeArticle(current, false)
We should now update the Solution definitions to use our new u_flat_article_text field, instead of the text field.
This ensures we always send the complete and clean data to the Machine Learning solution, without the need to configure each
Note: Be sure to check the results. I know the HTML clean script I use is not perfect, so if you have improvements, share them in the comments! Also, always check if there are official solutions to this at the point you need it.
The Script Include
The TEMPLATE_CONTENT_FIELDS JSON must be extended with the custom tables and fields used in your organization. The u_kb_your_template_table is an example you can remove.
var KnowledgeNormalize = Class.create();
// List all HTML fields on all tables used as an Input field on the Solution definit
KnowledgeNormalize.TEMPLATE_CONTENT_FIELDS = {
"kb_knowledge": ["text"],
"kb_template_faq": ["text", "kb_answer", "kb_question"],
"kb_template_how_to": ["text", "kb_introduction", "kb_instructions"],
"kb_template_kcs_article": ["text", "kb_issue", "kb_resolution", "kb_cause", "kb_environment"],
"kb_template_known_error_article": ["text", "kb_description", "kb_cause", "kb_workaround"],
"u_kb_your_template_table": ["text", "u_your_main_field", "u_your_secondary_field"]
},
KnowledgeNormalize.prototype = {
initialize: function () {
},
normalizeArticle: function (grKbArticle, doUpdate) {
var contentFields = KnowledgeNormalize.TEMPLATE_CONTENT_FIELDS[grKbArticle.getValue("sys_class_name")]["text"];
var flatContent = "";
if (grKbArticle.getTableName() !== grKbArticle.getValue("sys_class_name")) {
//gs.info("other"); //if the record is on an extended table, get the from actual table
grKbArticle = new GlideRecordUtil().getGR(grKbArticle.getValue("sys_class_name"), grKbArticle.getUniqueValue());
}
contentFields.forEach(function (field) {
gs.info("fielddd " + field);
//gs.info("valoo " + grKbArticle.getValue(field));
var content1 = getFlatTextFromHTML(grKbArticle.getValue(field)); // 3 pass cleanup
var content2 = getFlatTextFromHTML(content1);
var content3 = getFlatTextFromHTML(content2);
// a bit hacky, but sometimes the result is empty after a pass, used the last one that returns content
flatContent += (content3 ? content3 : content1) + "\n";
gs.info("valll " + flatContent);
});
//gs.info("cf" + contentFields);
// gs.info("fc" + flatContent);
// gs.info("T " + grKbArticle.getValue("sys_class_name"));
grKbArticle.setValue("u_flat_article_text", flatContent); //update via before BR or script
if (doUpdate) { // not when called from the BR
grKbArticle.setWorkflow(false);
grKbArticle.update();
//gs.info("Updated " + grKbArticle.getValue("u_flat_article_text"));
}
function getFlatTextFromHTML(input) {
if (!input) return "";
// Strip HTML tags
var result = input.replace(/<\/?[^>]+(>|$)/g, " ")"";
// Decode common HTML entities
var entities = {
" ": "",
"&": "&",
"<": "<",
">": ">",
""": "\"",
""": "\"",
"'": "'",
"’": "",
"”": "",
"“": ""
};
result = result.replace(/ |’|&|<|>|"|"|'|”|“/g, (match) => entities[match]);
// Trim leading and trailing whitespace
result = result.trim();
// Replace multiple spaces with a single space
result = result.replace(/\s+/g, ' ');
// Replace multiple newline characters with a single newline
result = result.replace(/\n+/g, '\n');
return result;
}
},
normalizeAllPublishedArticles: function (encodedQuery) {
var grKbArticle = new GlideRecord("kb_knowledge");
grKbArticle.addQuery("workflow_state", "published");
if (encodedQuery)
grKbArticle.addEncodedQuery(encodedQuery);
grKbArticle.query();
while (grKbArticle.next()) {
this.normalizeArticle(grKbArticle, true);
}
},
type: 'KnowledgeNormalize'
};
- 2,619 Views
- Mark as Read
- Mark as New
- Bookmark
- Permalink
- Report Inappropriate Content
How do you approach this if your Article Template fields all have different security rules?
That's the entire reason we used Article Templates, so that some external users could see a certain field, different external users could see another, and then all internal users could see them all.
I would be hesitant to do this, as I wouldn't want the AI results to show a snippet from a different article field that the current user wouldn't normally have access to.