Fusion Server – Index Pipelines

March 5, 2019

A collection of examples demonstrating how to use a variety of Lucidworks Fusion Index Pipeline stages.

JavaScript Index Stage

The JavaScript Index Stage accepts custom JavaScript processing logic for manipulating pipeline document (doc) values. One way to prove this stage is activated during an indexing job is to have it return a simple line of text to the Fusion 3.x connectors.log or the Fusion 4.2.x api.log file.

JavaScript Stage Log Entry
function (doc) {
    logger.info("ENTERING JAVASCRIPT STAGE");
}
Process a Field

This script shows the syntax for ensuring a value transformation is returned to the Index Pipeline from the JavaScript Index Stage. It makes sure there is a value first, then adds the value to a field in the Solr schema and returns the document to the pipeline.

function (doc) {
    logger.info("ENTERING JAVASCRIPT STAGE");

    if (doc) {
        logger.info("ENTERING JAVASCRIPT STAGE - FIELDS");

        // Primary Practice Area
        if (doc.getFirstFieldValue("ows_filterbyprimarypracticearea_s")) {
            var ppa = doc.getFirstFieldValue("ows_filterbyprimarypracticearea_s");

            doc.addField("primary_practice_area", ppa);
        }
        return doc;
    }
}
Process a Standard Facet

When using JavaScript to process a lot of fields and facets, make them mentally easier to manage by separating them into different JavaScript stages. In this script, a multiple-choice SharePoint field is transformed into an array value for a multi-value strings field in the Solr schema used by a search facets.

function (doc) {
    "use strict";

    if (doc) {
        logger.info("ENTERING JAVASCRIPT STAGE - FACETS");

        // Topic Menu
        var arrtop = [];
        if (doc.getFirstFieldValue("ows_topic_menu_s")) {
            arrtop = doc.getFirstFieldValue("ows_topic_menu_s");

            // this is a SharePoint multi-choice field that needs a bit of cleaning
            arrtop = arrtop.replace(/^;#/, '');
            arrtop = arrtop.replace(/;#$/, '');
            arrtop = arrtop.replace(/;#/g, ';');
            arrtop = arrtop.split(";");

            for (var i = 0, leni = arrtop.length; i < leni; i += 1) {
                doc.addField("topic_menu", arrtop[i]);
            }
        }
        return doc;
    }
}
Process a Hierarchical Facet

App Studio’s hierarchical facets require a child and parent field. This script returns both the child and parent values to the index from a single semicolon-delimited field in a SharePoint list.

function (doc) {
    "use strict";

    // Topics Taxonomy
    var tax = '';
    var itm = [];
    var heirs = [];
    var heirparents = [];

    // test, get field, clean
    if (doc.getFirstFieldValue("ows_topics_taxonomy_s")) {
        tax = doc.getFirstFieldValue("ows_topics_taxonomy_s");

        // split taxonomies string into array of items
        itm = tax.split(';');

        // process each taxonomy (even if only one)
        for (var itm_i = 0, itm_len = itm.length; itm_i < itm_len; itm_i++) {

            // split each taxonomy string into array of levels
            var levels = itm[itm_i].split("/");

            for (var levels_i = 0, levels_len = levels.length; levels_i < levels_len; levels_i++) {
                var level_cnt = 0;
                var hier_val = "";

                // prepend the correct index and parents for each level.
                while (level_cnt < levels_len) {

                    hier_val = hier_val + "/" + levels[level_cnt];
                    var path = level_cnt.toString() + hier_val;

                    // de-dupe hiers by checking for existence of "path"
                    if (heirs.indexOf(path) < 0) {
                        heirs.push(path);
                    }

                    // de-dupe parents by checking for existence of "path"
                    if (level_cnt < levels_len - 1 && heirparents.indexOf(path) < 0) {
                        heirparents.push(path);
                    }

                    level_cnt += 1;
                }
            }
        }

        // add arrays to field
        doc.addFields("topics_taxonomy_hier", heirs);
        doc.addFields("topics_taxonomy_hier_parent", heirparents);
    }
}
Reference a File of JavaScript Utility Functions

One of the best ways to simplify the code in a JavaScript Index Stage is to place repeating code in a function stored as a file on the Fusion Server and then refer to this function from the code in the stage. To do this, create a file that contains a JavaScript function (e.g. javascript_utils.js), and upload this file to the $FUSION_HOME/scripts folder (e.g. /opt/fusion/3.0.1/scripts/javascript_utils.js). The following example contains a single function that cleans up all SharePoint Lookup, Choice and Hyperlink fields:

// clean up SharePoint field delimiters
function cleanSPDelimiters(field, doc) {
    var x = '';

    // get field
    x = doc.getFirstFieldValue(field);

    // --- lookup field ----------------------------------
    x = x.replace(/^[0-9]+;#/, ''); // remove at start
    x = x.replace(/;#[0-9]+$/, ''); // remove at end
    x = x.replace(/;#[0-9]+;#/g, ';'); // replace middle

    // --- choice field ----------------------------------
    x = x.replace(/^;#/, ''); // remove at start
    x = x.replace(/;#$/, ''); // remove at end
    x = x.replace(/;#/g, ';'); // replace middle

    // --- hyperlink field -------------------------------
    // split string on comma and return left side
    x = x.split(',')[0];

    // return field
    return x;
}

After placing the file on the server, refer to it from the JavaScript Index Stage with a loaddeclaration, and then call the function(s) directly. Here’s how a single function simplify the cleanup process of the Facets example above:

function (doc) {
    "use strict";

    // load PSJH utilities;
    load(java.lang.System.getProperty('apollo.home') + '/scripts/javascript_utils.js');

    if (doc) {
        logger.info("ENTERING JAVASCRIPT STAGE - FACETS");

        // Topic Menu (Galleries)
        var arrtop = [];
        if (doc.getFirstFieldValue("ows_topic_menu_s")) {
            arrtop = cleanSPDelimiters("ows_topic_menu_s", doc).split(";");

            for (var i = 0, leni = arrtop.length; i < leni; i += 1) {
                doc.addField("topic_menu", arrtop[i]);
            }
        }

        return doc;
    }
}
Clean Up Special Characters in Document ID

In Fusion 3.0.1, the Solr Partial Update Indexer Stage adds two backslashes to a document id that contains special characters. The resulting id has a residual backslash, which prevents it from matching up with the id already in the index. This workaround uses JavaScript Index Stage to escape the special characters prior to the Solr Partial Update Indexer Stage.

function(doc) {
	if (doc.getId() !== null) {
		// get the ID
		var new_id = doc.getId();
		// escape dashes
		new_id = new_id.replace(/-/g,"\-");
		// change the id field
		doc.setId(new_id);
	}
	return doc;
}
;

Related

Managed JavaScript in Index/Query Pipelines

The Managed JavaScript Stage arrived on the scene with Fusion 4.1.1. This release leveraged the Fusion Server blob store and made it easy to manage ALL JavaScript functions from outside of the Fusion Server.

Managed JavaScript Log Entry

A simple way to prove how this stage works is to create and upload a simple JavaScript file (e.g. managed.js) to the Fusion blob store. Include a logger.info statement with a new line expression that makes finding this stage in the log files when troubleshooting.

function (doc) {
    logger.info("\n>>>>> Managed JavaScript Index Stage");
}

Then add a Managed JavaScript Stage to the Index Pipeline and refer to this file in the Script Reference field with the phrase urn:x-fusion:blob:managed.js. Note this is as simple as adding the file name to the end of a “urn:x-fusion:blob:” string. Finally, run a connector job using the Index Pipeline and check for the resulting log message in the Fusion Server’s api.log file.

Managed JavaScript Stage setting example.
Upload Code to Blob Store Automatically

Uploading JavaScript files from a desktop device to the blob store can be done automatically with a simple keyboard shortcut in a Visual Studio Code editor. To try it out, download and install Visual Studio Code, then find and enable the REST Client for Visual Studio Code extension. Be sure to review the REST Client extension Usage section before proceeding for best results.

When ready, add the following variables and PUT statement as a remarked section to the top of a JavaScript file (e.g. managed.js). It should look something like the following:

// Send Request
/*
@fusion_host = 10.11.12.13
@fusion_port = 8764
@basic_auth = Basic admin password
@app_name = myApp
@file_name = managed.js
@res_type = js-index

PUT http://{{fusion_host}}:{{fusion_port}}/api/apps/{{app_name}}/blobs/{{file_name}}
?resourceType=file:{{res_type}}
Content-Type: text/javascript
Authorization: {{basic_auth}}

< ./{{file_name}}
*/

function (doc) {
    logger.info("\n>>>>> Managed JavaScript Index Stage - test");
}

Adjust the fusion_host, basic_auth and app_name variables for the destination Fusion Search system, of course, and be sure to leave an empty line above and below the PUT statement.

To send this file to the blob store, highlight the content between the comment start/end lines, press the F1 key to bring up the command line input box, type the word “Rest” in the search box and select the “Rest Client: Send Request” option from the suggestions list. On successive run attempts, you may be able to right-click on those selected lines and choose “Send Request” from the context menu to send the request. If successful, a Response panel should appear on the right that begins with an HTTP/1.1 200 OK.

When updating JS code in an existing blob file that is used by an existing Index Pipeline, be sure to refresh/rebuild the pipeline before running a data connector job. This can be done by modifying and saving the pipeline from the UI or by sending a cURL command to the API like curl -u admin:password -X PUT http://localhost:8764/api/index-pipelines/index_document_pipeline/refresh.

Example Managed JavaScript File

The code below is an example of JavaScript file that contains all field, facet and utility functions needed for transforming SharePoint List/Library fields in a particular setting. It is referred to as urn:x-fusion:blob:psjhUtils.js from a single Managed JavaScript Stage in an Index Pipeline.

/*
@fusion_host = 10.11.12.13
@fusion_port = 8764
@basic_auth = Basic admin password
@app_name = myApp
@file_name = psjhUtils.js
@res_type = js-index

PUT http://{{fusion_host}}:{{fusion_port}}/api/apps/{{app_name}}/blobs/{{file_name}}
?resourceType=file:{{res_type}}
Content-Type: text/javascript
Authorization: {{basic_auth}}

< ./{{file_name}}
*/

function (doc) {
    logger.info("\n>>>>> Managed JavaScript Index Stage");

    if (doc) {
        // PSJH Measures		
        var arrpsjh = [];
        if (doc.getFirstFieldValue("ows_filterbypsjhmeasures_s")) {
            arrpsjh = cleanSPDelimiters("ows_filterbypsjhmeasures_s", doc).split(";");
            for (var i = 0, leni = arrpsjh.length; i < leni; i += 1) {
                doc.addField("psjh_measure", arrpsjh[i]);
            }
        }

        // Quality Strategy Domain
        var arrgsd = [];
        if (doc.getFirstFieldValue("ows_filterbyqualitystrategydomain_s")) {
            arrgsd = cleanSPDelimiters("ows_filterbyqualitystrategydomain_s", doc).split(";");

            for (var i = 0, leni = arrgsd.length; i < leni; i += 1) {
                doc.addField("quality_strategy_domain", arrgsd[i]);
            }
        }

        // Primary Practice Area
        var arrppa = [];
        if (doc.getFirstFieldValue("ows_filterbyprimarypracticearea_s")) {
            arrppa = cleanSPDelimiters("ows_filterbyprimarypracticearea_s", doc).split(";");

            for (var i = 0, leni = arrppa.length; i < leni; i += 1) {
                doc.addField("primary_practice_area", arrppa[i]);
            }
        }

        // Strategic Programs
        var arrstp = [];
        if (doc.getFirstFieldValue("ows_filterbystrategicprogram_s")) {
            arrstp = cleanSPDelimiters("ows_filterbystrategicprogram_s", doc).split(";");

            for (var i = 0, leni = arrstp.length; i < leni; i += 1) {
                doc.addField("strategic_program", arrstp[i]);
            }
        }

        // Resource_Type (Reference)
        var res = [];
        if (doc.getFirstFieldValue("ows_resourcetype_s")) {
            res = cleanSPDelimiters("ows_resourcetype_s", doc).split(";");

            for (var i = 0, leni = res.length; i < leni; i += 1) {
                doc.addField("resource_type", res[i]);
            }
        }

        // Relationships
        // Note: the "s" captures fields that are purely numbers ONLY where there are multiple
        // values and only if the first value is is a true string. The "l" captures fields 
        // that are purely numbers (or multiple fields where the first field is all numbers).

        var rel = "";
        // reports
        if (doc.getFirstFieldValue("ows_relatedproductidtext_s")) {
            rel = doc.getFirstFieldValue("ows_relatedproductidtext_s");
            rel = rel.replace(/#/g, "").replace(/;/g, " OR ");
            doc.addField("relationship", rel);
        }
        if (doc.getFirstFieldValue("ows_relatedproductidtext_l")) {
            rel = doc.getFirstFieldValue("ows_relatedproductidtext_l");
            rel = rel.replace(/#/g, "").replace(/;/g, " OR ");
            doc.addField("relationship", rel);
        }

        // Path String and Breadcrumb
        var pat = '';
        var revpat = '';
        var arrpat = [];
        var h = '';
        var p = '';

        if (doc.getFirstFieldValue('ows_path_s')) {
            pat = doc.getFirstFieldValue('ows_path_s');

            // generate pathstring that uses forward slashes
            revpat = pat.replace(/\\/g, '/');

            // generate pathlink from array
            arrpat = pat.split('\\');
            for (var i = 0, leni = arrpat.length; i < leni; i += 1) {

                // incrementally build new path
                if (i === 0) {
                    // first value only
                    p = arrpat[i];
                } else {
                    // all other values
                    p = p + '/' + arrpat[i];
                }

                // incrementally build new breadcrumb that includes path
                // NOTE: this is the only single and double quote pattern with escaping that works
                // with both Fusion/Solr search field and Appkit <search:field> tags
                if (i !== leni - 1) {
                    // add slash to end of all breadcrumb segments
                    h += "<a href='http://u90405.providence.org:8080/myhiway/#/search?q=pathstring:\"";
                    h += p;
                    h += "\"'>" + arrpat[i] + "</a> ยป ";
                } else {
                    // don't add slash to last segment
                    h += "<a href='http://u90405.providence.org:8080/myhiway/#/search?q=pathstring:\"";
                    h += p;
                    h += "\"'>" + arrpat[i] + "</a>";
                }
            }

            doc.addField('pathstring', revpat);
            doc.addField('pathlink', h);
        }

        logger.info("ENTERING JAVASCRIPT STAGE - FACETS");

        // Topic Menu (Galleries)
        var arrtop = [];
        if (doc.getFirstFieldValue("ows_topic_menu_s")) {
            arrtop = cleanSPDelimiters("ows_topic_menu_s", doc).split(";");

            for (var i = 0, leni = arrtop.length; i < leni; i += 1) {
                doc.addField("topic_menu", arrtop[i]);
            }
        }

        // Term Environment (Reference)
        var arrterm = [];
        if (doc.getFirstFieldValue("ows_filterbytermenvironment_s")) {
            arrterm = cleanSPDelimiters("ows_filterbytermenvironment_s", doc).split(";");

            for (var i = 0, leni = arrterm.length; i < leni; i += 1) {
                doc.addField("environment", arrterm[i]);
            }
        }

        // entity (Reference)
        var arrentity = [];
        if (doc.getFirstFieldValue("ows_entity_s")) {
            arrentity = cleanSPDelimiters("ows_entity_s", doc).split(";");

            for (var i = 0, leni = arrentity.length; i < leni; i += 1) {
                doc.addField("entity", arrentity[i]);
            }
        }

        // galleries page tabs
        var arrgtab = [];
        if (doc.getFirstFieldValue("ows_tab_s")) {
            arrgtab = cleanSPDelimiters("ows_tab_s", doc).split(";");

            for (var i = 0, leni = arrgtab.length; i < leni; i += 1) {
                doc.addField("tab_galleries", arrgtab[i]);
            }
        }

        // Topic Menu (Reference)
        var arrtop = [];
        if (doc.getFirstFieldValue("ows_topic_s")) {
            arrtop = cleanSPDelimiters("ows_topic_s", doc).split(";");

            for (var i = 0, leni = arrtop.length; i < leni; i += 1) {
                doc.addField("topic_menu", arrtop[i]);
            }
        }

        // Category Menu (Top Reports)
        var arrcatm = [];
        if (doc.getFirstFieldValue("ows_category_menu_s")) {
            arrcatm = cleanSPDelimiters("ows_category_menu_s", doc).split(";");

            for (var i = 0, leni = arrcatm.length; i < leni; i += 1) {
                var trim = arrcatm[i].replace(/^.*:\s/, "");
                doc.addField("category_menu", trim);
            }
        }

        // Category (Reference)
        var arrtcat = [];
        if (doc.getFirstFieldValue("ows_category_s")) {
            arrtcat = cleanSPDelimiters("ows_category_s", doc).split(";");

            for (var i = 0, leni = arrtcat.length; i < leni; i += 1) {
                doc.addField("training_category", arrtcat[i]);
            }
        }

        // Training Audience (Reference)
        var arrtaud = [];
        if (doc.getFirstFieldValue("ows_filterbytrainingaudience_s")) {
            arrtaud = cleanSPDelimiters("ows_filterbytrainingaudience_s", doc).split(";");

            for (var i = 0, leni = arrtaud.length; i < leni; i += 1) {
                doc.addField("training_audience", arrtaud[i]);
            }
        }


        // Resource Type (Reference)
        var arrres = [];
        if (doc.getFirstFieldValue("ows_resourcetype_s")) {
            arrres = cleanSPDelimiters("ows_resourcetype_s", doc).split(";");

            for (var i = 0, leni = arrres.length; i < leni; i += 1) {
                doc.addField("training_type", arrres[i]);
            }
        }

        // Audience (Gallery)
        var aud = '',
            arraud = [];
        if (doc.getFirstFieldValue("ows_audience_s")) {

            // from list
            arraud = cleanSPDelimiters("ows_audience_s", doc).split(";");

            for (var i = 0, leni = arraud.length; i < leni; i += 1) {
                var trimaud = arraud[i].replace(/^.*-/, "");
                doc.addField("audience", trimaud);
            }
        }

        // UserRole (Gallery - Clinical Operations Kadlec)
        var arrusr = [];
        if (doc.getFirstFieldValue("ows_user_role_s")) {

            // from list
            arrusr = cleanSPDelimiters("ows_user_role_s", doc).split(";");

            for (var i = 0, leni = arrusr.length; i < leni; i += 1) {
                var trimusr = arrusr[i].replace(/^.*:\s/, "");
                doc.addField("userrole", trimusr);
            }
        }

        // Groups (Gallery - Clinical Operations Kadlec)
        var arrgrp = [];
        if (doc.getFirstFieldValue("ows_groups_s")) {

            // from list
            arrgrp = cleanSPDelimiters("ows_groups_s", doc).split(";");

            for (var i = 0, leni = arrgrp.length; i < leni; i += 1) {
                var trimgrp = arrgrp[i].replace(/^.*:\s/, "");
                doc.addField("groups", trimgrp);
            }
        }

        // Topics Taxonomy (DataCatalog-Products_Reports_***)
        var tax = '';
        var itm = [];
        var heirs = [];
        var heirparents = [];

        // test, get field, clean
        if (doc.getFirstFieldValue("ows_topics_taxonomy_s")) {
            tax = cleanSPDelimiters("ows_topics_taxonomy_s", doc);

            // split taxonomies string into array of items
            itm = tax.split(';');

            // process each taxonomy (even if only one)
            for (var itm_i = 0, itm_len = itm.length; itm_i < itm_len; itm_i++) {

                // split each taxonomy string into array of levels
                var levels = itm[itm_i].split("/");

                for (var levels_i = 0, levels_len = levels.length; levels_i < levels_len; levels_i++) {
                    var level_cnt = 0;

                    // prepend the correct index and parents for each level.
                    while (level_cnt < levels_len) {

                        var hier_val = hier_val + "/" + levels[level_cnt];
                        var path = level_cnt.toString() + hier_val;

                        // de-dupe hiers by checking for existence of "path"
                        if (heirs.indexOf(path) < 0) {
                            heirs.push(path);
                        }

                        // de-dupe parents by checking for existence of "path"
                        if (level_cnt < levels_len - 1 && heirparents.indexOf(path) < 0) {
                            heirparents.push(path);
                        }

                        level_cnt += 1;
                    }
                }
            }

            // add arrays to field
            doc.addFields("topics_taxonomy_hier", heirs);
            doc.addFields("topics_taxonomy_hier_parent", heirparents);
        }

        // Location Taxonomy (Reports-ClinicalQuality)
        var tax = '';
        var itm = [];
        var heirs = [];
        var heirparents = [];

        // test and get field
        if (doc.getFirstFieldValue("ows_location_s")) {
            tax = doc.getFirstFieldValue("ows_location_s");

            // cleanup any multi-choice SharePoint Lookup column delimiters
            //tax = tax.replace(/;#[0-9]+/g, "").replace(/;#/g, ";");

            // cleanup any multi-choice SharePoint Choice column delimiters
            tax = tax.replace(/^;#/, "").replace(/;#$/, "").replace(/;#/g, ";");

            // split taxonomies string into array of items
            itm = tax.split(';');

            // process each taxonomy (even if only one)
            for (var itm_i = 0, itm_len = itm.length; itm_i < itm_len; itm_i++) {

                // split each taxonomy string into array of levels
                var levels = itm[itm_i].split("|");

                for (var levels_i = 0, levels_len = levels.length; levels_i < levels_len; levels_i++) {
                    var level_cnt = 0;

                    // prepend the correct index and parents for each level.
                    while (level_cnt < levels_len) {

                        var hier_val = hier_val + "/" + levels[level_cnt];
                        var path = level_cnt.toString() + hier_val;

                        // de-dupe hiers by checking for existence of "path"
                        if (heirs.indexOf(path) < 0) {
                            heirs.push(path);
                        }

                        // de-dupe parents by checking for existence of "path"
                        if (level_cnt < levels_len - 1 && heirparents.indexOf(path) < 0) {
                            heirparents.push(path);
                        }

                        level_cnt += 1;
                    }
                }
            }

            // add arrays to field
            doc.addFields("location_hier", heirs);
            doc.addFields("location_hier_parent", heirparents);
        }

        return doc;
    }

    // clean up SharePoint field delimiters
    function cleanSPDelimiters(field, doc) {
        var x = '';

        // get field
        x = doc.getFirstFieldValue(field);

        // --- lookup field ----------------------------------
        x = x.replace(/^[0-9]+;#/, ''); // remove at start
        x = x.replace(/;#[0-9]+$/, ''); // remove at end
        x = x.replace(/;#[0-9]+;#/g, ';'); // replace middle

        // --- choice field ----------------------------------
        x = x.replace(/^;#/, ''); // remove at start
        x = x.replace(/;#$/, ''); // remove at end
        x = x.replace(/;#/g, ';'); // replace middle

        // --- hyperlink field -------------------------------
        // split string on comma and return left side
        x = x.split(',')[0];

        // return field
        return x;
    }
}
  Jay Hill – Lucidworks

Leave a Reply

Your email address will not be published. Required fields are marked *