Skip to content
24 changes: 3 additions & 21 deletions lib/annotated-marc-serializer.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

const arrayUnique = require('./util').arrayUnique
const relatorMappings = require('../data/relator-mappings.json')
const { varFieldMatches, buildSourceWithMasking } = require('./marc-util')

class AnnotatedMarcSerializer {
}
Expand Down Expand Up @@ -133,32 +134,13 @@ AnnotatedMarcSerializer.matchingMarcFields = function (bib, rule) {
*
* @return {boolean}
*/
AnnotatedMarcSerializer.varFieldMatches = function (field, rule) {
const fieldMarcIndicator = `${field.marcTag}${field.ind1 || ' '}${field.ind2 || ' '}`
return rule.marcIndicatorRegExp.test(fieldMarcIndicator) &&
rule.fieldTag === field.fieldTag
}
AnnotatedMarcSerializer.varFieldMatches = varFieldMatches

/**
* Given a varField, returns a copy with any hidden subfield content replaced
* with "[redacted]" based on given rule
*/
AnnotatedMarcSerializer.buildSourceWithMasking = function (field, rule) {
return Object.assign({}, field, {
subfields: (field.subfields || [])
.map((subfield) => {
let subfieldContent = subfield.content
// If directive is 'include' and subfield not included
// .. or directive is 'exclude', but subfield included,
// [redact] it:
if ((rule.subfieldSpec.directive === 'include' && rule.subfieldSpec.subfields.indexOf(subfield.tag) < 0) ||
(rule.subfieldSpec.directive === 'exclude' && rule.subfieldSpec.subfields.indexOf(subfield.tag) >= 0)) {
subfieldContent = '[redacted]'
}
return Object.assign({}, subfield, { content: subfieldContent })
})
})
}
AnnotatedMarcSerializer.buildSourceWithMasking = buildSourceWithMasking

/**
* Get prefix for a marctag & subfield, given a previous subfield (if avail.)
Expand Down
192 changes: 192 additions & 0 deletions lib/marc-serializer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
/**
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're putting the rest of the codebase to shame with all this detailed documentation! Nicely done though it definitely helps make it easy to understand what is going on.

* @typedef {object} MarcRuleSubfieldSpec
* @property {array<string>} subfields - Array of subfields to match for suppression
* @property {string} directive - Indicates whether the matching subfields
* should be "include"d or "exclude"d
*/
/**
* @typedef {object} MarcRule
* @property {string} fieldTag - Single character tag broadly classifying tag (e.g. 'y')
* @property {string} marcIndicatorRegExp - Stringified regex for matching a
* VarField tag joined to 1st and 2nd indicators
* @property {MarcRuleSubfieldSpec} subfieldSpec - How to match subfields
* @property {string} directive - Whether to include/exclude if matched.
*/

/**
* @typedef {object} SubField
* @property {string} tag - Identifying tag (e.g. '6', 'a')
* @property {string} content - Value of subfield
*/

/**
* @typedef {object} VarField
* * @property {string} marcTag - Three digit number classifying field (e.g. '100')
* @property {string} fieldTag - Single character tag broadly classifying tag (e.g. 'y')
* @property {string} content - Root level content (usually null/ignored)
* @property {array<SubField>} subfields
* @property {string|null} ind1 - First indicator character (space if blank)
* @property {string|null} ind2 - Second indicator character (space if blank)
*/

/**
* @typedef {object} SerializedBib
* @property {string} id - Bib ID
* @property {string} nyplSource - MARC source
* @property {array<VarField>} fields - Array of varFields after suppression
*/

/**
* @typedef {object} SerializedMarc
* @property {SerializedBib} bib - The serialized bib object containing varFields
*/

const { varFieldMatches, buildSourceWithMasking } = require('./marc-util')

class MarcSerializer {}

// Load rules
MarcSerializer.mappingRules = require('../data/annotated-marc-rules.json')
.map((rule) => {
return Object.assign({}, rule, {
marcIndicatorRegExp: new RegExp(rule.marcIndicatorRegExp)
})
})

/**
* Returns true if a field matches a given MARC rule
* @param {VarField} field - MARC field to test
* @param {MarcRule} rule - Rule to match against
* @returns {boolean}
*/
MarcSerializer.varFieldMatches = varFieldMatches

/**
* Returns a copy of a varField with masked subfields according to the rule
* @param {VarField} field - MARC field to mask
* @param {MarcRule} rule - Rule defining subfields to mask
* @returns {VarField} Masked field
*/
MarcSerializer.buildSourceWithMasking = buildSourceWithMasking

/**
* Check if a field is the LEADER
* @param {VarField} field - Field to check
* @returns {boolean}
*/
MarcSerializer.isLeaderField = function (field) {
return field.fieldTag === '_' && field.marcTag === null && typeof field.content === 'string'
}

/**
* Check if a field is a control field (MARC tags 001–009 and no subfields)
* @param {VarField} field - Field to check
* @returns {boolean}
*/
MarcSerializer.isControlField = function (field) {
return field.marcTag && /^[0][0-9][0-9]$/.test(field.marcTag) && (!field.subfields || field.subfields.length === 0)
}

MarcSerializer.describeField = function (field) {
return `${field.marcTag}${field.ind1 || ' '}${field.ind2 || ' '}`
}

/**
* Finds linked 880 fields (parallel scripts) for a given field
* @param {Bib} bib - Bib object containing varFields
* @param {VarField} sourceField - Field to find parallels for
* @returns {Array<VarField>} Array of parallel 880 fields
*/
MarcSerializer.findParallelFields = function (bib, sourceField) {
const linkNumbers = extractLinkingNumbers(sourceField)
if (linkNumbers.length === 0) return []

return bib.varFields.filter((field) =>
isLinked880Field(field, linkNumbers)
)
}

/**
* Extracts linking numbers from subfield 6, removing the 880- prefix
*/
function extractLinkingNumbers (varField) {
return (varField.subfields || [])
// Is a MARC linking subfield ($6)?
.filter((subfield) => subfield.tag === '6')
.map((subfield) => subfield.content.replace(/^880-/, ''))
}

/**
* Determines whether a field is an 880 field linked to any of the given numbers
*/
function isLinked880Field (field, linkNumbers) {
if (field.marcTag !== '880' || !field.subfields) return false

const fieldLinks = field.subfields
// Is a MARC linking subfield ($6)?
.filter((subfield) => subfield.tag === '6')
.map((subfield) => subfield.content)

return fieldLinks.some((link) =>
linkNumbers.some((linkNumber) => isMatchingLink(link, linkNumber))
)
}

/**
* Checks whether a link contains the link number at position 4
*/
function isMatchingLink (link, linkNumber) {
return link.indexOf(linkNumber) === 4
}

/**
* Serializes a bib with excluded fields and redacted subfields
* @param {Bib} bib - Bib to serialize
* @returns {SerializedMarc} Serialized bib
*/
MarcSerializer.serialize = function (bib) {
const serializedVarFields = bib.varFields
.map((field) => {
// Pass leader through
if (MarcSerializer.isLeaderField(field)) return field

// Pass control fields through
if (MarcSerializer.isControlField(field)) return field

// Find matching rule
const matchingRule = MarcSerializer.mappingRules.find((rule) =>
MarcSerializer.varFieldMatches(field, rule)
)

// If no rule, leave as is
if (!matchingRule) return field

// Handle field-level exclusion
if (matchingRule.directive === 'exclude') {
return null
}

// Mask field according to rule (handles subfield-level include/exclude)
const maskedField = MarcSerializer.buildSourceWithMasking(field, matchingRule)

// Handle parallel 880 fields
const parallels = MarcSerializer.findParallelFields(bib, field)
parallels.forEach((p) => {
Object.assign(p, MarcSerializer.buildSourceWithMasking(p, matchingRule))
})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this an effort to link parallel fields to primary fields so that they can be visually linked on the front-end? Aren't we just rendering 880s as is?

Incidentally I'm not seeing known 880 values come through when viewing/api/v0.1/discovery/resources/b22144813.marclocally

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah this may be wrong then– I'm trying to find the parallels of suppressed fields and suppress them, but otherwise pass parallels through


return maskedField
})
// Remove any nulls from excluded fields
.filter(Boolean)

return {
bib: {
id: bib.id,
nyplSource: bib.nyplSource,
fields: serializedVarFields
}
}
}

module.exports = MarcSerializer
39 changes: 39 additions & 0 deletions lib/marc-util.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/**
* * Returns true if a field matches a given MARC rule
* @param {VarField} field
* @param {MarcRule} rule
* @returns {boolean}
*/
function varFieldMatches (field, rule) {
const indicator = `${field.marcTag || ''}${field.ind1 || ' '}${field.ind2 || ' '}`
return rule.fieldTag === field.fieldTag && rule.marcIndicatorRegExp.test(indicator)
}

/**
* Returns a copy of a varField with masked subfields according to the rule
* @param {VarField} field
* @param {MarcRule} rule
* @returns {VarField}
*/
function buildSourceWithMasking (field, rule) {
return {
...field,
subfields: (field.subfields || []).map((subfield) => {
let content = subfield.content
if (
(rule.subfieldSpec.directive === 'include' &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this is in the original code, but do we know why this is logically here? If the directive says include but it is not present in the subfield spec, wouldn't we just skip it? Wouldn't this add [redacted] for fields that don't have content? @nonword maybe you have some insight?

!rule.subfieldSpec.subfields.includes(subfield.tag)) ||
(rule.subfieldSpec.directive === 'exclude' &&
rule.subfieldSpec.subfields.includes(subfield.tag))
) {
content = '[redacted]'
}
return { ...subfield, content }
})
}
}

module.exports = {
varFieldMatches,
buildSourceWithMasking
}
25 changes: 25 additions & 0 deletions lib/resources.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const AggregationSerializer = require('./jsonld_serializers.js').AggregationSeri
const ItemResultsSerializer = require('./jsonld_serializers.js').ItemResultsSerializer
const LocationLabelUpdater = require('./location_label_updater')
const AnnotatedMarcSerializer = require('./annotated-marc-serializer')
const MarcSerializer = require('./marc-serializer')
const { makeNyplDataApiClient } = require('./data-api-client')
const { IndexSearchError, IndexConnectionError } = require('./errors')

Expand Down Expand Up @@ -231,6 +232,30 @@ module.exports = function (app, _private = null) {
.then(AnnotatedMarcSerializer.serialize)
}

// Get a single raw marc:
app.resources.marc = async function (params, opts) {
// Convert discovery id to nyplSource and un-prefixed id:
const nyplSourceMapper = await NyplSourceMapper.instance()
const { id, nyplSource } = nyplSourceMapper.splitIdentifier(params.uri) ?? {}

if (!id || !nyplSource) {
throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`)
}

app.logger.debug('Resources#marc', { id, nyplSource })

return makeNyplDataApiClient().get(`bibs/${nyplSource}/${id}`)
.then((resp) => {
// need to check that the query actually found an entry
if (!resp.data) {
throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`)
} else {
return resp.data
}
})
.then(MarcSerializer.serialize)
}

function itemsByFilter (filter, opts) {
opts = Object.assign({
_source: null
Expand Down
2 changes: 2 additions & 0 deletions routes/resources.js
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ module.exports = function (app) {

if (req.params.ext === 'annotated-marc') {
handler = app.resources.annotatedMarc
} else if (req.params.ext === 'marc') {
handler = app.resources.marc
}

return handler(params, { baseUrl: app.baseUrl }, req)
Expand Down
Loading