A collection of examples demonstrating how to use a variety of Lucidworks Fusion Index Pipeline stages.
- JavaScript Stage Log Entry
- Process a Field
- Process a Standard Facet
- Process a Hierarchical Facet
- Reference a File of JavaScript Utility Functions
- Clean Up Special Characters in Document ID
- Managed JavaScript Stage
- Managed JavaScript Stage Log Entry
- Upload Code to Blob Store Automatically
- Example Managed JavaScript File
The JavaScript Index Stage accepts custom JavaScript processing logic for manipulating pipeline document (doc) values. One way to prove this stage is activated during an indexing job is to have it return a simple line of text to the Fusion 3.x connectors.log
or the Fusion 4.2.x api.log
file.
JavaScript Stage Log Entry
function (doc) {
logger.info("ENTERING JAVASCRIPT STAGE");
}
Process a Field
This script shows the syntax for ensuring a value transformation is returned to the Index Pipeline from the JavaScript Index Stage. It makes sure there is a value first, then adds the value to a field in the Solr schema and returns the document to the pipeline.
function (doc) {
logger.info("ENTERING JAVASCRIPT STAGE");
if (doc) {
logger.info("ENTERING JAVASCRIPT STAGE - FIELDS");
// Primary Practice Area
if (doc.getFirstFieldValue("ows_filterbyprimarypracticearea_s")) {
var ppa = doc.getFirstFieldValue("ows_filterbyprimarypracticearea_s");
doc.addField("primary_practice_area", ppa);
}
return doc;
}
}
Process a Standard Facet
When using JavaScript to process a lot of fields and facets, make them mentally easier to manage by separating them into different JavaScript stages. In this script, a multiple-choice SharePoint field is transformed into an array value for a multi-value strings field in the Solr schema used by a search facets.
function (doc) {
"use strict";
if (doc) {
logger.info("ENTERING JAVASCRIPT STAGE - FACETS");
// Topic Menu
var arrtop = [];
if (doc.getFirstFieldValue("ows_topic_menu_s")) {
arrtop = doc.getFirstFieldValue("ows_topic_menu_s");
// this is a SharePoint multi-choice field that needs a bit of cleaning
arrtop = arrtop.replace(/^;#/, '');
arrtop = arrtop.replace(/;#$/, '');
arrtop = arrtop.replace(/;#/g, ';');
arrtop = arrtop.split(";");
for (var i = 0, leni = arrtop.length; i < leni; i += 1) {
doc.addField("topic_menu", arrtop[i]);
}
}
return doc;
}
}
Process a Hierarchical Facet
App Studio's hierarchical facets require a child and parent field. This script returns both the child and parent values to the index from a single semicolon-delimited field in a SharePoint list.
function (doc) {
"use strict";
// Topics Taxonomy
var tax = '';
var itm = [];
var heirs = [];
var heirparents = [];
// test, get field, clean
if (doc.getFirstFieldValue("ows_topics_taxonomy_s")) {
tax = doc.getFirstFieldValue("ows_topics_taxonomy_s");
// split taxonomies string into array of items
itm = tax.split(';');
// process each taxonomy (even if only one)
for (var itm_i = 0, itm_len = itm.length; itm_i < itm_len; itm_i++) {
// split each taxonomy string into array of levels
var levels = itm[itm_i].split("/");
for (var levels_i = 0, levels_len = levels.length; levels_i < levels_len; levels_i++) {
var level_cnt = 0;
var hier_val = "";
// prepend the correct index and parents for each level.
while (level_cnt < levels_len) {
hier_val = hier_val + "/" + levels[level_cnt];
var path = level_cnt.toString() + hier_val;
// de-dupe hiers by checking for existence of "path"
if (heirs.indexOf(path) < 0) {
heirs.push(path);
}
// de-dupe parents by checking for existence of "path"
if (level_cnt < levels_len - 1 && heirparents.indexOf(path) < 0) {
heirparents.push(path);
}
level_cnt += 1;
}
}
}
// add arrays to field
doc.addFields("topics_taxonomy_hier", heirs);
doc.addFields("topics_taxonomy_hier_parent", heirparents);
}
}
Reference a File of JavaScript Utility Functions
One of the best ways to simplify the code in a JavaScript Index Stage is to place repeating code in a function stored as a file on the Fusion Server and then refer to this function from the code in the stage. To do this, create a file that contains a JavaScript function (e.g. javascript_utils.js
), and upload this file to the $FUSION_HOME/scripts folder (e.g. /opt/fusion/3.0.1/scripts/javascript_utils.js
). The following example contains a single function that cleans up all SharePoint Lookup, Choice and Hyperlink fields:
// clean up SharePoint field delimiters
function cleanSPDelimiters(field, doc) {
var x = '';
// get field
x = doc.getFirstFieldValue(field);
// --- lookup field ----------------------------------
x = x.replace(/^[0-9]+;#/, ''); // remove at start
x = x.replace(/;#[0-9]+$/, ''); // remove at end
x = x.replace(/;#[0-9]+;#/g, ';'); // replace middle
// --- choice field ----------------------------------
x = x.replace(/^;#/, ''); // remove at start
x = x.replace(/;#$/, ''); // remove at end
x = x.replace(/;#/g, ';'); // replace middle
// --- hyperlink field -------------------------------
// split string on comma and return left side
x = x.split(',')[0];
// return field
return x;
}
After placing the file on the server, refer to it from the JavaScript Index Stage with a load
declaration, and then call the function(s) directly. Here's how a single function simplify the cleanup process of the Facets example above:
function (doc) {
"use strict";
// load PSJH utilities;
load(java.lang.System.getProperty('apollo.home') + '/scripts/javascript_utils.js');
if (doc) {
logger.info("ENTERING JAVASCRIPT STAGE - FACETS");
// Topic Menu (Galleries)
var arrtop = [];
if (doc.getFirstFieldValue("ows_topic_menu_s")) {
arrtop = cleanSPDelimiters("ows_topic_menu_s", doc).split(";");
for (var i = 0, leni = arrtop.length; i < leni; i += 1) {
doc.addField("topic_menu", arrtop[i]);
}
}
return doc;
}
}
Clean Up Special Characters in Document ID
In Fusion 3.0.1, the Solr Partial Update Indexer Stage adds two backslashes to a document id that contains special characters. The resulting id has a residual backslash, which prevents it from matching up with the id already in the index. This workaround uses JavaScript Index Stage to escape the special characters prior to the Solr Partial Update Indexer Stage.
function(doc) {
if (doc.getId() !== null) {
// get the ID
var new_id = doc.getId();
// escape dashes
new_id = new_id.replace(/-/g,"\-");
// change the id field
doc.setId(new_id);
}
return doc;
}
;
Related
https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html#TheStandardQueryParser-EscapingSpecialCharacters
Managed JavaScript in Index/Query Pipelines
The Managed JavaScript Stage arrived on the scene with Fusion 4.1.1. This release leveraged the Fusion Server blob store and made it easy to manage ALL JavaScript functions from outside of the Fusion Server.
Managed JavaScript Log Entry
A simple way to prove how this stage works is to create and upload a simple JavaScript file (e.g. managed.js) to the Fusion blob store. Include a logger.info
statement with a new line expression that makes finding this stage in the log files when troubleshooting.
function (doc) {
logger.info("\n>>>>> Managed JavaScript Index Stage");
}
Then add a Managed JavaScript Stage to the Index Pipeline and refer to this file in the Script Reference field with the phrase urn:x-fusion:blob:managed.js
. Note this is as simple as adding the file name to the end of a "urn:x-fusion:blob:" string. Finally, run a connector job using the Index Pipeline and check for the resulting log message in the Fusion Server's api.log
file.
Upload Code to Blob Store Automatically
Uploading JavaScript files from a desktop device to the blob store can be done automatically with a simple keyboard shortcut in a Visual Studio Code editor. To try it out, download and install Visual Studio Code, then find and enable the REST Client for Visual Studio Code extension. Be sure to review the REST Client extension Usage section before proceeding for best results.
When ready, add the following variables and PUT statement as a remarked section to the top of a JavaScript file (e.g. managed.js). It should look something like the following:
// Send Request
/*
@fusion_host = 10.11.12.13
@fusion_port = 8764
@basic_auth = Basic admin password
@app_name = myApp
@file_name = managed.js
@res_type = js-index
PUT http://{{fusion_host}}:{{fusion_port}}/api/apps/{{app_name}}/blobs/{{file_name}}
?resourceType=file:{{res_type}}
Content-Type: text/javascript
Authorization: {{basic_auth}}
< ./{{file_name}}
*/
function (doc) {
logger.info("\n>>>>> Managed JavaScript Index Stage - test");
}
Adjust the fusion_host
, basic_auth
and app_name
variables for the destination Fusion Search system, of course, and be sure to leave an empty line above and below the PUT statement.
To send this file to the blob store, highlight the content between the comment start/end lines, press the F1 key to bring up the command line input box, type the word "Rest" in the search box and select the "Rest Client: Send Request" option from the suggestions list. On successive run attempts, you may be able to right-click on those selected lines and choose "Send Request" from the context menu to send the request. If successful, a Response panel should appear on the right that begins with an HTTP/1.1 200 OK
.
When updating JS code in an existing blob file that is used by an existing Index Pipeline, be sure to refresh/rebuild the pipeline before running a data connector job. This can be done by modifying and saving the pipeline from the UI or by sending a cURL command to the API like curl -u admin:password -X PUT http://localhost:8764/api/index-pipelines/index_document_pipeline/refresh
.
Example Managed JavaScript File
The code below is an example of JavaScript file that contains all field, facet and utility functions needed for transforming SharePoint List/Library fields in a particular setting. It is referred to as urn:x-fusion:blob:psjhUtils.js
from a single Managed JavaScript Stage in an Index Pipeline.
/*
@fusion_host = 10.11.12.13
@fusion_port = 8764
@basic_auth = Basic admin password
@app_name = myApp
@file_name = psjhUtils.js
@res_type = js-index
PUT http://{{fusion_host}}:{{fusion_port}}/api/apps/{{app_name}}/blobs/{{file_name}}
?resourceType=file:{{res_type}}
Content-Type: text/javascript
Authorization: {{basic_auth}}
< ./{{file_name}}
*/
function (doc) {
logger.info("\n>>>>> Managed JavaScript Index Stage");
if (doc) {
// PSJH Measures
var arrpsjh = [];
if (doc.getFirstFieldValue("ows_filterbypsjhmeasures_s")) {
arrpsjh = cleanSPDelimiters("ows_filterbypsjhmeasures_s", doc).split(";");
for (var i = 0, leni = arrpsjh.length; i < leni; i += 1) {
doc.addField("psjh_measure", arrpsjh[i]);
}
}
// Quality Strategy Domain
var arrgsd = [];
if (doc.getFirstFieldValue("ows_filterbyqualitystrategydomain_s")) {
arrgsd = cleanSPDelimiters("ows_filterbyqualitystrategydomain_s", doc).split(";");
for (var i = 0, leni = arrgsd.length; i < leni; i += 1) {
doc.addField("quality_strategy_domain", arrgsd[i]);
}
}
// Primary Practice Area
var arrppa = [];
if (doc.getFirstFieldValue("ows_filterbyprimarypracticearea_s")) {
arrppa = cleanSPDelimiters("ows_filterbyprimarypracticearea_s", doc).split(";");
for (var i = 0, leni = arrppa.length; i < leni; i += 1) {
doc.addField("primary_practice_area", arrppa[i]);
}
}
// Strategic Programs
var arrstp = [];
if (doc.getFirstFieldValue("ows_filterbystrategicprogram_s")) {
arrstp = cleanSPDelimiters("ows_filterbystrategicprogram_s", doc).split(";");
for (var i = 0, leni = arrstp.length; i < leni; i += 1) {
doc.addField("strategic_program", arrstp[i]);
}
}
// Resource_Type (Reference)
var res = [];
if (doc.getFirstFieldValue("ows_resourcetype_s")) {
res = cleanSPDelimiters("ows_resourcetype_s", doc).split(";");
for (var i = 0, leni = res.length; i < leni; i += 1) {
doc.addField("resource_type", res[i]);
}
}
// Relationships
// Note: the "s" captures fields that are purely numbers ONLY where there are multiple
// values and only if the first value is is a true string. The "l" captures fields
// that are purely numbers (or multiple fields where the first field is all numbers).
var rel = "";
// reports
if (doc.getFirstFieldValue("ows_relatedproductidtext_s")) {
rel = doc.getFirstFieldValue("ows_relatedproductidtext_s");
rel = rel.replace(/#/g, "").replace(/;/g, " OR ");
doc.addField("relationship", rel);
}
if (doc.getFirstFieldValue("ows_relatedproductidtext_l")) {
rel = doc.getFirstFieldValue("ows_relatedproductidtext_l");
rel = rel.replace(/#/g, "").replace(/;/g, " OR ");
doc.addField("relationship", rel);
}
// Path String and Breadcrumb
var pat = '';
var revpat = '';
var arrpat = [];
var h = '';
var p = '';
if (doc.getFirstFieldValue('ows_path_s')) {
pat = doc.getFirstFieldValue('ows_path_s');
// generate pathstring that uses forward slashes
revpat = pat.replace(/\\/g, '/');
// generate pathlink from array
arrpat = pat.split('\\');
for (var i = 0, leni = arrpat.length; i < leni; i += 1) {
// incrementally build new path
if (i === 0) {
// first value only
p = arrpat[i];
} else {
// all other values
p = p + '/' + arrpat[i];
}
// incrementally build new breadcrumb that includes path
// NOTE: this is the only single and double quote pattern with escaping that works
// with both Fusion/Solr search field and Appkit <search:field> tags
if (i !== leni - 1) {
// add slash to end of all breadcrumb segments
h += "<a href='http://u90405.providence.org:8080/myhiway/#/search?q=pathstring:\"";
h += p;
h += "\"'>" + arrpat[i] + "</a> » ";
} else {
// don't add slash to last segment
h += "<a href='http://u90405.providence.org:8080/myhiway/#/search?q=pathstring:\"";
h += p;
h += "\"'>" + arrpat[i] + "</a>";
}
}
doc.addField('pathstring', revpat);
doc.addField('pathlink', h);
}
logger.info("ENTERING JAVASCRIPT STAGE - FACETS");
// Topic Menu (Galleries)
var arrtop = [];
if (doc.getFirstFieldValue("ows_topic_menu_s")) {
arrtop = cleanSPDelimiters("ows_topic_menu_s", doc).split(";");
for (var i = 0, leni = arrtop.length; i < leni; i += 1) {
doc.addField("topic_menu", arrtop[i]);
}
}
// Term Environment (Reference)
var arrterm = [];
if (doc.getFirstFieldValue("ows_filterbytermenvironment_s")) {
arrterm = cleanSPDelimiters("ows_filterbytermenvironment_s", doc).split(";");
for (var i = 0, leni = arrterm.length; i < leni; i += 1) {
doc.addField("environment", arrterm[i]);
}
}
// entity (Reference)
var arrentity = [];
if (doc.getFirstFieldValue("ows_entity_s")) {
arrentity = cleanSPDelimiters("ows_entity_s", doc).split(";");
for (var i = 0, leni = arrentity.length; i < leni; i += 1) {
doc.addField("entity", arrentity[i]);
}
}
// galleries page tabs
var arrgtab = [];
if (doc.getFirstFieldValue("ows_tab_s")) {
arrgtab = cleanSPDelimiters("ows_tab_s", doc).split(";");
for (var i = 0, leni = arrgtab.length; i < leni; i += 1) {
doc.addField("tab_galleries", arrgtab[i]);
}
}
// Topic Menu (Reference)
var arrtop = [];
if (doc.getFirstFieldValue("ows_topic_s")) {
arrtop = cleanSPDelimiters("ows_topic_s", doc).split(";");
for (var i = 0, leni = arrtop.length; i < leni; i += 1) {
doc.addField("topic_menu", arrtop[i]);
}
}
// Category Menu (Top Reports)
var arrcatm = [];
if (doc.getFirstFieldValue("ows_category_menu_s")) {
arrcatm = cleanSPDelimiters("ows_category_menu_s", doc).split(";");
for (var i = 0, leni = arrcatm.length; i < leni; i += 1) {
var trim = arrcatm[i].replace(/^.*:\s/, "");
doc.addField("category_menu", trim);
}
}
// Category (Reference)
var arrtcat = [];
if (doc.getFirstFieldValue("ows_category_s")) {
arrtcat = cleanSPDelimiters("ows_category_s", doc).split(";");
for (var i = 0, leni = arrtcat.length; i < leni; i += 1) {
doc.addField("training_category", arrtcat[i]);
}
}
// Training Audience (Reference)
var arrtaud = [];
if (doc.getFirstFieldValue("ows_filterbytrainingaudience_s")) {
arrtaud = cleanSPDelimiters("ows_filterbytrainingaudience_s", doc).split(";");
for (var i = 0, leni = arrtaud.length; i < leni; i += 1) {
doc.addField("training_audience", arrtaud[i]);
}
}
// Resource Type (Reference)
var arrres = [];
if (doc.getFirstFieldValue("ows_resourcetype_s")) {
arrres = cleanSPDelimiters("ows_resourcetype_s", doc).split(";");
for (var i = 0, leni = arrres.length; i < leni; i += 1) {
doc.addField("training_type", arrres[i]);
}
}
// Audience (Gallery)
var aud = '',
arraud = [];
if (doc.getFirstFieldValue("ows_audience_s")) {
// from list
arraud = cleanSPDelimiters("ows_audience_s", doc).split(";");
for (var i = 0, leni = arraud.length; i < leni; i += 1) {
var trimaud = arraud[i].replace(/^.*-/, "");
doc.addField("audience", trimaud);
}
}
// UserRole (Gallery - Clinical Operations Kadlec)
var arrusr = [];
if (doc.getFirstFieldValue("ows_user_role_s")) {
// from list
arrusr = cleanSPDelimiters("ows_user_role_s", doc).split(";");
for (var i = 0, leni = arrusr.length; i < leni; i += 1) {
var trimusr = arrusr[i].replace(/^.*:\s/, "");
doc.addField("userrole", trimusr);
}
}
// Groups (Gallery - Clinical Operations Kadlec)
var arrgrp = [];
if (doc.getFirstFieldValue("ows_groups_s")) {
// from list
arrgrp = cleanSPDelimiters("ows_groups_s", doc).split(";");
for (var i = 0, leni = arrgrp.length; i < leni; i += 1) {
var trimgrp = arrgrp[i].replace(/^.*:\s/, "");
doc.addField("groups", trimgrp);
}
}
// Topics Taxonomy (DataCatalog-Products_Reports_***)
var tax = '';
var itm = [];
var heirs = [];
var heirparents = [];
// test, get field, clean
if (doc.getFirstFieldValue("ows_topics_taxonomy_s")) {
tax = cleanSPDelimiters("ows_topics_taxonomy_s", doc);
// split taxonomies string into array of items
itm = tax.split(';');
// process each taxonomy (even if only one)
for (var itm_i = 0, itm_len = itm.length; itm_i < itm_len; itm_i++) {
// split each taxonomy string into array of levels
var levels = itm[itm_i].split("/");
for (var levels_i = 0, levels_len = levels.length; levels_i < levels_len; levels_i++) {
var level_cnt = 0;
// prepend the correct index and parents for each level.
while (level_cnt < levels_len) {
var hier_val = hier_val + "/" + levels[level_cnt];
var path = level_cnt.toString() + hier_val;
// de-dupe hiers by checking for existence of "path"
if (heirs.indexOf(path) < 0) {
heirs.push(path);
}
// de-dupe parents by checking for existence of "path"
if (level_cnt < levels_len - 1 && heirparents.indexOf(path) < 0) {
heirparents.push(path);
}
level_cnt += 1;
}
}
}
// add arrays to field
doc.addFields("topics_taxonomy_hier", heirs);
doc.addFields("topics_taxonomy_hier_parent", heirparents);
}
// Location Taxonomy (Reports-ClinicalQuality)
var tax = '';
var itm = [];
var heirs = [];
var heirparents = [];
// test and get field
if (doc.getFirstFieldValue("ows_location_s")) {
tax = doc.getFirstFieldValue("ows_location_s");
// cleanup any multi-choice SharePoint Lookup column delimiters
//tax = tax.replace(/;#[0-9]+/g, "").replace(/;#/g, ";");
// cleanup any multi-choice SharePoint Choice column delimiters
tax = tax.replace(/^;#/, "").replace(/;#$/, "").replace(/;#/g, ";");
// split taxonomies string into array of items
itm = tax.split(';');
// process each taxonomy (even if only one)
for (var itm_i = 0, itm_len = itm.length; itm_i < itm_len; itm_i++) {
// split each taxonomy string into array of levels
var levels = itm[itm_i].split("|");
for (var levels_i = 0, levels_len = levels.length; levels_i < levels_len; levels_i++) {
var level_cnt = 0;
// prepend the correct index and parents for each level.
while (level_cnt < levels_len) {
var hier_val = hier_val + "/" + levels[level_cnt];
var path = level_cnt.toString() + hier_val;
// de-dupe hiers by checking for existence of "path"
if (heirs.indexOf(path) < 0) {
heirs.push(path);
}
// de-dupe parents by checking for existence of "path"
if (level_cnt < levels_len - 1 && heirparents.indexOf(path) < 0) {
heirparents.push(path);
}
level_cnt += 1;
}
}
}
// add arrays to field
doc.addFields("location_hier", heirs);
doc.addFields("location_hier_parent", heirparents);
}
return doc;
}
// clean up SharePoint field delimiters
function cleanSPDelimiters(field, doc) {
var x = '';
// get field
x = doc.getFirstFieldValue(field);
// --- lookup field ----------------------------------
x = x.replace(/^[0-9]+;#/, ''); // remove at start
x = x.replace(/;#[0-9]+$/, ''); // remove at end
x = x.replace(/;#[0-9]+;#/g, ';'); // replace middle
// --- choice field ----------------------------------
x = x.replace(/^;#/, ''); // remove at start
x = x.replace(/;#$/, ''); // remove at end
x = x.replace(/;#/g, ';'); // replace middle
// --- hyperlink field -------------------------------
// split string on comma and return left side
x = x.split(',')[0];
// return field
return x;
}
}