-
Notifications
You must be signed in to change notification settings - Fork 0
SCC-5105: MARC endpoint #608
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
1b60095
d056e5e
56adb1a
51a71bf
e17ec22
a999561
b60340c
5ba17c6
a3e86b0
393cbed
90a50f3
145a2cd
3ebf6a1
61b57eb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,192 @@ | ||
| /** | ||
| * @typedef {object} MarcRuleSubfieldSpec | ||
| * @property {array<string>} subfields - Array of subfields to match for suppression | ||
| * @property {string} directive - Indicates whether the matching subfields | ||
| * should be "include"d or "exclude"d | ||
| */ | ||
| /** | ||
| * @typedef {object} MarcRule | ||
| * @property {string} fieldTag - Single character tag broadly classifying tag (e.g. 'y') | ||
| * @property {string} marcIndicatorRegExp - Stringified regex for matching a | ||
| * VarField tag joined to 1st and 2nd indicators | ||
| * @property {MarcRuleSubfieldSpec} subfieldSpec - How to match subfields | ||
| * @property {string} directive - Whether to include/exclude if matched. | ||
| */ | ||
|
|
||
| /** | ||
| * @typedef {object} SubField | ||
| * @property {string} tag - Identifying tag (e.g. '6', 'a') | ||
| * @property {string} content - Value of subfield | ||
| */ | ||
|
|
||
| /** | ||
| * @typedef {object} VarField | ||
| * * @property {string} marcTag - Three digit number classifying field (e.g. '100') | ||
| * @property {string} fieldTag - Single character tag broadly classifying tag (e.g. 'y') | ||
| * @property {string} content - Root level content (usually null/ignored) | ||
| * @property {array<SubField>} subfields | ||
| * @property {string|null} ind1 - First indicator character (space if blank) | ||
| * @property {string|null} ind2 - Second indicator character (space if blank) | ||
| */ | ||
|
|
||
| /** | ||
| * @typedef {object} SerializedBib | ||
| * @property {string} id - Bib ID | ||
| * @property {string} nyplSource - MARC source | ||
| * @property {array<VarField>} fields - Array of varFields after suppression | ||
| */ | ||
|
|
||
| /** | ||
| * @typedef {object} SerializedMarc | ||
| * @property {SerializedBib} bib - The serialized bib object containing varFields | ||
| */ | ||
|
|
||
| const { varFieldMatches, buildSourceWithMasking } = require('./marc-util') | ||
|
|
||
| class MarcSerializer {} | ||
|
|
||
| // Load rules | ||
| MarcSerializer.mappingRules = require('../data/annotated-marc-rules.json') | ||
| .map((rule) => { | ||
| return Object.assign({}, rule, { | ||
| marcIndicatorRegExp: new RegExp(rule.marcIndicatorRegExp) | ||
| }) | ||
| }) | ||
|
|
||
| /** | ||
| * Returns true if a field matches a given MARC rule | ||
| * @param {VarField} field - MARC field to test | ||
| * @param {MarcRule} rule - Rule to match against | ||
| * @returns {boolean} | ||
| */ | ||
| MarcSerializer.varFieldMatches = varFieldMatches | ||
|
|
||
| /** | ||
| * Returns a copy of a varField with masked subfields according to the rule | ||
| * @param {VarField} field - MARC field to mask | ||
| * @param {MarcRule} rule - Rule defining subfields to mask | ||
| * @returns {VarField} Masked field | ||
| */ | ||
| MarcSerializer.buildSourceWithMasking = buildSourceWithMasking | ||
|
|
||
| /** | ||
| * Check if a field is the LEADER | ||
| * @param {VarField} field - Field to check | ||
| * @returns {boolean} | ||
| */ | ||
| MarcSerializer.isLeaderField = function (field) { | ||
| return field.fieldTag === '_' && field.marcTag === null && typeof field.content === 'string' | ||
| } | ||
|
|
||
| /** | ||
| * Check if a field is a control field (MARC tags 001–009 and no subfields) | ||
| * @param {VarField} field - Field to check | ||
| * @returns {boolean} | ||
| */ | ||
| MarcSerializer.isControlField = function (field) { | ||
| return field.marcTag && /^[0][0-9][0-9]$/.test(field.marcTag) && (!field.subfields || field.subfields.length === 0) | ||
| } | ||
|
|
||
| MarcSerializer.describeField = function (field) { | ||
| return `${field.marcTag}${field.ind1 || ' '}${field.ind2 || ' '}` | ||
| } | ||
|
|
||
| /** | ||
| * Finds linked 880 fields (parallel scripts) for a given field | ||
| * @param {Bib} bib - Bib object containing varFields | ||
| * @param {VarField} sourceField - Field to find parallels for | ||
| * @returns {Array<VarField>} Array of parallel 880 fields | ||
| */ | ||
| MarcSerializer.findParallelFields = function (bib, sourceField) { | ||
| const linkNumbers = extractLinkingNumbers(sourceField) | ||
| if (linkNumbers.length === 0) return [] | ||
|
|
||
| return bib.varFields.filter((field) => | ||
| isLinked880Field(field, linkNumbers) | ||
| ) | ||
| } | ||
|
|
||
| /** | ||
| * Extracts linking numbers from subfield 6, removing the 880- prefix | ||
| */ | ||
| function extractLinkingNumbers (varField) { | ||
| return (varField.subfields || []) | ||
| // Is a MARC linking subfield ($6)? | ||
| .filter((subfield) => subfield.tag === '6') | ||
| .map((subfield) => subfield.content.replace(/^880-/, '')) | ||
| } | ||
|
|
||
| /** | ||
| * Determines whether a field is an 880 field linked to any of the given numbers | ||
| */ | ||
| function isLinked880Field (field, linkNumbers) { | ||
| if (field.marcTag !== '880' || !field.subfields) return false | ||
|
|
||
| const fieldLinks = field.subfields | ||
| // Is a MARC linking subfield ($6)? | ||
| .filter((subfield) => subfield.tag === '6') | ||
| .map((subfield) => subfield.content) | ||
|
|
||
| return fieldLinks.some((link) => | ||
| linkNumbers.some((linkNumber) => isMatchingLink(link, linkNumber)) | ||
| ) | ||
| } | ||
|
|
||
| /** | ||
| * Checks whether a link contains the link number at position 4 | ||
| */ | ||
| function isMatchingLink (link, linkNumber) { | ||
| return link.indexOf(linkNumber) === 4 | ||
| } | ||
|
|
||
| /** | ||
| * Serializes a bib with excluded fields and redacted subfields | ||
| * @param {Bib} bib - Bib to serialize | ||
| * @returns {SerializedMarc} Serialized bib | ||
| */ | ||
| MarcSerializer.serialize = function (bib) { | ||
| const serializedVarFields = bib.varFields | ||
| .map((field) => { | ||
| // Pass leader through | ||
| if (MarcSerializer.isLeaderField(field)) return field | ||
|
|
||
| // Pass control fields through | ||
| if (MarcSerializer.isControlField(field)) return field | ||
|
|
||
| // Find matching rule | ||
| const matchingRule = MarcSerializer.mappingRules.find((rule) => | ||
| MarcSerializer.varFieldMatches(field, rule) | ||
| ) | ||
|
|
||
| // If no rule, leave as is | ||
| if (!matchingRule) return field | ||
|
|
||
| // Handle field-level exclusion | ||
| if (matchingRule.directive === 'exclude') { | ||
| return null | ||
| } | ||
|
|
||
| // Mask field according to rule (handles subfield-level include/exclude) | ||
| const maskedField = MarcSerializer.buildSourceWithMasking(field, matchingRule) | ||
|
|
||
| // Handle parallel 880 fields | ||
| const parallels = MarcSerializer.findParallelFields(bib, field) | ||
| parallels.forEach((p) => { | ||
| Object.assign(p, MarcSerializer.buildSourceWithMasking(p, matchingRule)) | ||
| }) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this an effort to link parallel fields to primary fields so that they can be visually linked on the front-end? Aren't we just rendering 880s as is? Incidentally I'm not seeing known 880 values come through when viewing
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah this may be wrong then– I'm trying to find the parallels of suppressed fields and suppress them, but otherwise pass parallels through |
||
|
|
||
| return maskedField | ||
| }) | ||
| // Remove any nulls from excluded fields | ||
| .filter(Boolean) | ||
|
|
||
| return { | ||
| bib: { | ||
| id: bib.id, | ||
| nyplSource: bib.nyplSource, | ||
| fields: serializedVarFields | ||
| } | ||
| } | ||
| } | ||
|
|
||
| module.exports = MarcSerializer | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| /** | ||
| * * Returns true if a field matches a given MARC rule | ||
| * @param {VarField} field | ||
| * @param {MarcRule} rule | ||
| * @returns {boolean} | ||
| */ | ||
| function varFieldMatches (field, rule) { | ||
| const indicator = `${field.marcTag || ''}${field.ind1 || ' '}${field.ind2 || ' '}` | ||
| return rule.fieldTag === field.fieldTag && rule.marcIndicatorRegExp.test(indicator) | ||
| } | ||
|
|
||
| /** | ||
| * Returns a copy of a varField with masked subfields according to the rule | ||
| * @param {VarField} field | ||
| * @param {MarcRule} rule | ||
| * @returns {VarField} | ||
| */ | ||
| function buildSourceWithMasking (field, rule) { | ||
| return { | ||
| ...field, | ||
| subfields: (field.subfields || []).map((subfield) => { | ||
| let content = subfield.content | ||
| if ( | ||
| (rule.subfieldSpec.directive === 'include' && | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know this is in the original code, but do we know why this is logically here? If the directive says include but it is not present in the subfield spec, wouldn't we just skip it? Wouldn't this add |
||
| !rule.subfieldSpec.subfields.includes(subfield.tag)) || | ||
| (rule.subfieldSpec.directive === 'exclude' && | ||
| rule.subfieldSpec.subfields.includes(subfield.tag)) | ||
| ) { | ||
| content = '[redacted]' | ||
| } | ||
| return { ...subfield, content } | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| module.exports = { | ||
| varFieldMatches, | ||
| buildSourceWithMasking | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're putting the rest of the codebase to shame with all this detailed documentation! Nicely done though it definitely helps make it easy to understand what is going on.