The Zurich release has arrived! Interested in new features and functionalities? Click here for more

Importing Confluence pages into knowledge articles in ServiceNow

Rajkumar99
Tera Contributor

This article is a continuation of the previous discussions on integrating Confluence with ServiceNow for knowledge migration. If you haven’t seen them yet, I recommend checking out these two articles first:

In this follow-up, we will extend the solution further by focusing on:

  • Handling recursive child pages under a Confluence space

  • Formatting tables and text for ServiceNow KB compatibility

  • Downloading and remapping attachments

  • Handling the cursor logic and E2E Implementation

All the rest api end points can be found in the Confluence prod docs.

NameEndpoint
Attachment_listhttps://tenant_name.atlassian.net/wiki/rest/api/content/${confluenceID}/child/attachment?limit=250&s...
GET_Bodyhttps://tenant_name.atlassian.net/wiki/rest/api/content/${confluenceID}?expand=body.export_view
Attachment_downloadhttps://tenant_name.atlassian.net/wiki/rest/api/content/${confluenceID}/child/attachment/${attachmen...
GET_rootPageshttps://tenant_name.atlassian.net/wiki/api/v2/spaces/${spaceId}/pages?depth=root&limit=250&status=cu...
GET_Labelhttps://tenant_name.atlassian.net/wiki/rest/api/content/${confluenceID}/label
GET_Authorhttps://tenant_name.atlassian.net/wiki/rest/api/content/${confluenceID}
GET_Children_cushttps://tenant_name.atlassian.net/wiki/api/v2/pages/${spaceID}/direct-children?limit=250
GET_dynamic 
GET_allPagesinSpacehttps://tenant_name.atlassian.net/wiki/api/v2/spaces/${spaceID}/pages?limit=250&status=current

 

In Confluence, a space functions much like a knowledge base in ServiceNow.
For migration scenarios where you want to bring over an entire space, you can use the Get all pages from space API. To do this, you’ll first need the space ID. You can obtain it by running the following URL in a browser where you’re already logged into Confluence:

https://<your_tenant>.atlassian.net/wiki/rest/api/space/{spaceKey}

Once you have the space ID, you can have a schedule job in ServiceNow to pull and import all pages automatically.
Schedule JOB:

(function() {
var spaceID =; // Space ID (try using the sys property with JSON so you can parse and use them here)
var allPageIds = []; // will contain all page IDs
var nextUrl = null;
var fallbackUserId = fallback admin sys_id - use the sys property to store it

// Collect all page IDs from the space
do {
var pageListRequest;
if (nextUrl) {
pageListRequest = new sn_ws.RESTMessageV2('Confluence', 'GET_dynamic');
pageListRequest.setEndpoint(nextUrl);
} else {
pageListRequest = new sn_ws.RESTMessageV2('Confluence', 'GET_allPagesinSpace');
pageListRequest.setStringParameter('spaceID', spaceID);
}

var pageListResponse = pageListRequest.execute();
var pageListJSON = JSON.parse(pageListResponse.getBody());
var pages = pageListJSON.results;

for (var i = 0; i < pages.length; i++) {
allPageIds.push(pages[i].id);
}

var nextCursorLink = pageListJSON._links.next;
if (gs.nil(nextCursorLink)) {
break;
}
// Get the cursor value - the next url used for pagination since we receive only 250 for single call.
var cursorParts = nextCursorLink.split("cursor=");
var cursorValue = cursorParts[cursorParts.length - 1]; // while executing I found every time cursor values keeps appending so we need to get the last value.
if (cursorValue == "") {
break;  
}
nextUrl = "https://tenant_name.atlassian.net/wiki/api/v2/spaces/" + spaceID + "/pages?cursor=" + cursorValue + "&limit=250";
} while (nextUrl);
gs.info("Values of pages: " + JSON.stringify(allPageIds));

//Normalize and wrapping tables here - since we have tables in confluence we can have the same in SN
function bulkReplace(text, replacements) {
for (var key in replacements) {
var regex = new RegExp(key, "g");
text = text.replace(regex, replacements[key]);
}
return text;
}

function normalizeTables(text) {
text = text.replace(/<div class="table-wrap">/g, '').replace(/<\/div>/g, '');

text = text.replace(/<table[^>]*>/g,
'<div style="overflow-x:auto; max-width:100%;">' +
'<table style="border-collapse: collapse; border: 1px solid #000; width: 100%;">'
).replace(/<\/table>/g, '</table></div>');

text = text.replace(/<th[^>]*>/g,
'<th style="border: 1px solid #000; background-color: #e6e6e6; padding: 5px; text-align: center;">'
);

text = text.replace(/<td[^>]*>/g,
'<td style="border: 1px solid #000; padding: 5px;">'
);

return text;
}
// Attachment section
function downloadAndMapAttachments(confluenceID, sysid, attachments) {
var map = {};
for (var c = 0; c < attachments.length; c++) {
try {
var attachmentRequest = new sn_ws.RESTMessageV2('Confluence', 'Attachment_download');
attachmentRequest.setStringParameter('confluenceID', confluenceID);
attachmentRequest.setStringParameter('attachmentID', attachments[c].id);

var cleanName = attachments[c].title.replace(/\s+/g, "");
attachmentRequest.saveResponseBodyAsAttachment('kb_knowledge', sysid, cleanName);
attachmentRequest.execute();

var attGR = new GlideRecord('sys_attachment');
attGR.addQuery('table_sys_id', sysid);
attGR.addQuery('file_name', cleanName);
attGR.orderByDesc('sys_created_on');
attGR.setLimit(1);
attGR.query();
if (attGR.next()) {
map[attachments[c].title] = attGR.sys_id.toString();
}

} catch (ex) {
gs.error("Attachment error for file " + attachments[c].title + ": " + ex.message);
}
}
return map;
}

// Pull page one by one - process starts here
for (var idx = 0; idx < allPageIds.length; idx++) {
var confluenceID = allPageIds[idx];

// Fetching Page Body
var bodyRequest = new sn_ws.RESTMessageV2('Confluence', 'GET_Body');
bodyRequest.setStringParameter('confluenceID', confluenceID);
var bodyResponse = JSON.parse(bodyRequest.execute().getBody());

// Fetching Author
var authorRequest = new sn_ws.RESTMessageV2('Confluence', 'GET_Author');
authorRequest.setStringParameter('confluenceID', confluenceID);
var authorResponse = JSON.parse(authorRequest.execute().getBody());

// Fexthing Labels
var labelRequest = new sn_ws.RESTMessageV2('Confluence', 'GET_Label');
labelRequest.setStringParameter('confluenceID', confluenceID);
var labelResponse = JSON.parse(labelRequest.execute().getBody());

var meta = (labelResponse.results && labelResponse.results.length > 0) ?
labelResponse.results.map(function(x) {
return x.name;
}).join(",") :
bodyResponse.title;

// Creating KB article here
var kbGR = new GlideRecord('kb_knowledge');
kbGR.initialize();
kbGR.short_description = bodyResponse.title;
kbGR.kb_knowledge_base = '057b4ed33b87e610cb531da985e45ab9'; // Update KB base sys_id here by using  sys_property
kbGR.meta = meta;

// Format body text
var replacements = {
"<p": '<p style="font-family: helvetica; font-size: 10pt;" ',
"<h1": '<h1 style="font-family: helvetica; font-size: 17pt;" ',
"<h2": '<h2 style="font-family: helvetica; font-size: 12pt;" ',
"<h3": '<h3 style="font-family: helvetica; font-size: 9pt;" ',
"<h4": '<h4 style="font-family: helvetica; font-size: 8pt;" ',
"<h5": '<h5 style="font-family: helvetica; font-size: 7pt;" ',
"<h6": '<h6 style="font-family: helvetica; font-size: 5pt;" ',
"<pre": '<pre style="font-family: monospace; font-size: 10pt;" ',
"<td": '<td style="font-family: helvetica; font-size: 10pt;" ',
"<th": '<th style="font-family: helvetica; font-size: 14pt; background-color: #e6e6e6; text-align: center;" ',
"%20": '' //this was added since encoded file name in the attachments -Raj
};


var bodyText = bulkReplace(bodyResponse.body.export_view.value, replacements);
bodyText = normalizeTables(bodyText);

// Append Confluence ref link at the end so it will easy to navigate, ignore if not required
var confluenceURL = bodyResponse._links.base + bodyResponse._links.webui;
var referenceNote = '<p style="margin-top: 30px; font-style: italic; font-size: 9pt;">Confluence article reference: <a href="' + confluenceURL + '" target="_blank">' + confluenceURL + '</a></p>';
bodyText += referenceNote;

kbGR.text = bodyText;

// Assign Author
var authorEmail = authorResponse.history.createdBy.email;
var authorName = authorResponse.history.createdBy.displayName || authorResponse.history.createdBy.publicName;

if (authorEmail) {
var emailGr = new GlideRecord('sys_user');
emailGr.addQuery('email', authorEmail);
emailGr.query();
kbGR.author = emailGr.next() ? emailGr.sys_id.toString() : fallbackUserId;
} else if (authorName) {
var userGr = new GlideRecord('sys_user');
userGr.addQuery('name', authorName);
userGr.query();
kbGR.author = userGr.next() ? userGr.sys_id.toString() : fallbackUserId;
} else {
kbGR.author = fallbackUserId;
}

var kbSysId = kbGR.insert();

// Attachments
var attachmentListRequest = new sn_ws.RESTMessageV2('Confluence', 'Attachment_list');
attachmentListRequest.setStringParameter('confluenceID', confluenceID);
var attachmentListResponse = JSON.parse(attachmentListRequest.execute().getBody());
var attachmentMap = downloadAndMapAttachments(confluenceID, kbSysId, attachmentListResponse.results || []);

// Replace attachment links in KB body
var kbUpdate = new GlideRecord('kb_knowledge');
if (kbUpdate.get(kbSysId)) {
var updatedText = kbUpdate.text;
for (var fileName in attachmentMap) {
var encodedFileName = encodeURIComponent(fileName);
// Removing %20
encodedFileName = encodedFileName.replace(/%20/g, '');
var oldLink = 'src="https://tenant_name.atlassian.net/wiki/download/attachments/' + confluenceID + '/' + encodedFileName + '?api&#61;v2"'; // In few pages we observe this link
var oldLink2 = 'src="/wiki/download/attachments/' + confluenceID + '/' + encodedFileName + '?api&#61;v2"'; // In few pages we observe this link
var newLink = 'src="/sys_attachment.do?sys_id=' + attachmentMap[fileName] + '"/';
updatedText = updatedText.replaceAll(oldLink, newLink);
updatedText = updatedText.replaceAll(oldLink2, newLink);
}
kbUpdate.text = updatedText;
kbUpdate.update();
}
//gs.info("KB: " + bodyResponse.title + " → KB sys_id: " + kbSysId);
}
})();.
Tip:If you need to count the number of pages in a Confluence space, one workaround is:
Space → Space Settings → Content → Reorder.
From there, collapse all pages, copy the list, paste it into Excel, and use the row count. It’s a bit messy, but since Confluence doesn’t give an easy out-of-the-box option, this trick works better than trying to run a direct DB query..
While exploring the Confluence REST API documentation, you’ll also notice that page status values can be filtered using query parameters. Common status values include: currentarchiveddeletedtrashed.

PS: This is my first article, so any feedback, suggestions, or corrections are most welcome.
0 REPLIES 0