Import of Word Documents to ServiceNow Knowledge base

welterp · ‎02-10-2014

I am trying to import Word Documents to ServiceNow Knowledge Articles with out losing any images. I have tried using the Word Cleaner and SED installs without any success. Is there any fix that works?

conmic · ‎09-09-2014

IMPORTANT UPDATE - 03/03/2016:

I updated the entire script as I ran into some errors. And now it can also be used with journal fields!

See my reply: Re: Import of Word Documents to ServiceNow Knowledge base

You can also read more about it here: Create Knowledge Base Article (HTML field) or Comments (Journal field) from Email

you corrected the thrown exception errors on 4 of the lines in the script.

example on your line 18 with the part:

while(match == regex.exec(searchBody)){

the actual script is

while(match = regex.exec(searchBody)){

I know it throws an errors, but that's somehow required for the script to work. Because later you're going to use the value in the variable 'match' that has been set in line 18... If somebody has any idea how to formulate this correctly, you're welcome to help out.

Here is the full script that I use:

var gdt = new GlideDate();

gdt.addDays(2);

//current.valid_to = gdt;

var desc = email.subject;

desc = desc.replace("Knowledge:","");

current.short_description = desc.trim();

current.workflow_state = "draft";

current.topic = "General";

//current.category = "";

current.roles = "knowledge";



//Find and replace the image tags with the proper source.

//Get the number of attachments so the loop can be exited

//so it will stop no mater what.

var currentCount = 0;

var newBody = email.body_html;

var searchBody = email.body_html.replace(/\n/g, " ");



var regex = /<img(.*?)>/ig;

var match;

var match2;

while(match = regex.exec(searchBody)){

   //Add a style float tag next to the align tag.

   var alignText = match[0].replace(/align=['"]?left['"]?/gi, 'align="left" style="FLOAT: left"');

   alignText = alignText.replace(/align=['"]?right['"]?/gi, 'align="right" style="FLOAT: right"');

   searchBody = searchBody.replace(match[0], alignText);

   var regex2 = /src=("(.)*?"|'(.)*?'|(.)*?\s+$)?/ig;

   while(match2 = regex2.exec(alignText)){

   findAndReplaceImage(match2[1].replace(/\s+$/,"").replace(/"+/g,"").replace(/'+/g,""));

   }

   currentCount += 1;

   if(currentCount >= 100)

   break;

}

searchBody = searchBody.replace(/<o:p>(.*?)<\/o:p>/ig, "");

currentCount = 0;

var regex2 = /<!--\[if(.*?)<!\[endif\]-->/ig;

while(match = regex2.exec(searchBody)){

   searchBody = searchBody.replace(match[0], "");

   currentCount += 1;

   if(currentCount >= 100)

   break;

}

currentCount = 0;

var regex2 = /<!\[if !vml\]>(.*?)<!\[endif\]>/ig;

while(match = regex2.exec(searchBody)){

   searchBody = searchBody.replace(match[0], match[1]);

   currentCount += 1;

   if(currentCount >= 100)

   break;

}



//gs.log("Create Morning Post: " + searchBody);



current.text = searchBody;



current.insert();



event.state="stop_processing";



function getEmailSYSID(emailuid) {

   var em = new GlideRecord('sys_email');

   em.addQuery('uid', emailuid);

   em.query();

   while(em.next()) {

   //we execute the return only within a certain time difference between the creation of the attachment and now (in seconds)

   var dif = gs.dateDiff(em.sys_created_on, gs.nowNoTZ(), true);

   //gs.log("difference: " + dif + "eid: " + em.sys_id + " uid: " + em.uid); //debug

   if(dif < 300 && dif > -300){

   //gs.log('passed dif if: ' + em.sys_id + " uid: " + em.uid); //debug

   return em.sys_id;

   }

   }

   return "";

}




function findAndReplaceImage(imageText){

   var img = imageText;

   var imgName = img.substring(4, img.search(/@/i));

   var imgCode = "sys_attachment.do?sys_id=";

   //Get the sys_id of the attachment

   var gr = new GlideRecord("sys_attachment");

   gr.addQuery("file_name", imgName);

   gr.addQuery("table_sys_id", getEmailSYSID(email.uid));

   gr.query();

   if (gr.next()) {

   imgCode += gr.sys_id;

   }

   searchBody = searchBody.replace(img, imgCode);

}

View solution in original post

NicholasH745021 · ‎04-28-2015

We are on Eureka and this script works great!! Thank you for the corrections! Joseph Nicolosi

jfinn · ‎05-28-2015

Tested on Eureka and the code works ok with one or two minor issues.

1.) Bullet points are imported as follows. NOTE: I got one email to import correctly but it had been edited and I am trying to replicate the conditions. Otherwise, all other emails have been imported as below.

<![if !supportLists]> · <![endif]>Test line 1

2.) Text boxes do not import correctly. Sometimes used for adding text to an image but either don't get imported (if placed on top of the image) or get imported but creates a table and places the image into one of the cells.

3.) I am also wondering if there is any way of getting the images to import into the library rather than as attachments. We have some KB articles which have attachments along with multiple images. When all the images in the KB article are added as attachments, this means that the other attachments do not stand out as much.

All in all, this code will come in very useful as is but just wondering if anybody has encountered the same issues as above and possibly found a solution / workaround.

Alex125 · ‎05-28-2015

We got the same issue with the bullet points and so far no fix. So looking for that..

laurentc · ‎06-30-2015

For the bullet points issue, we added this code:

searchBody = searchBody.replace(/<!\[if !supportLists\]>/ig, "");


searchBody = searchBody.replace(/<!\[endif\]>/ig, "");

We placed it at the line 56 of the latest code proposed by Joseph Nicolosi.

Works in Eureka

Alex125 · ‎06-30-2015

Thanks Laurent!