Module:PsalmTable/Aligned
From Psalms: Layer by Layer
Documentation for this module may be created at Module:PsalmTable/Aligned/doc
local p = {}
local quiet = false
local chapter = 200
local separatePunctuation = true
local skipHeader = false
local tableClass = ""
-- ===== Debug helpers =====
local debug = false
local debugLog = {}
local function dbg(fmt, ...)
if debug then table.insert(debugLog, string.format(fmt, ...)) end
end
-- Normalize hbIDs: accept "hb-1-2-3" or "1-2-3"
local function canonHb(id)
if type(id) ~= "string" then return nil end
id = mw.text.trim(id)
if id == "" then return nil end
if id:match("^%d+%-%d+%-%d+$") then
return "hb-" .. id
end
return id
end
-- Numeric-with-suffix sort (e.g., "1" < "1a" < "1b" < "2")
local function parseVerseKey(key)
key = tostring(key or ""):gsub("%s+", "")
local num, suffix = key:match("^(%d+)([a-zA-Z]*)$")
return tonumber(num or 0), suffix or ""
end
-- Split, normalize, and find earliest Hebrew index across possibly multiple hbIDs
local function minHbIndexOf(ids, idxMap)
local minI
for raw in tostring(ids or ""):gmatch("%S+") do
local key = canonHb(raw)
local i = key and idxMap[key] or nil
if i and (not minI or i < minI) then minI = i end
end
return minI
end
local function applyVerseFilter(data, args)
if type(data) ~= "table" then
table.insert(debugLog, "❌ applyVerseFilter: expected table, got " .. tostring(type(data)))
return {}
end
local verseFilter = args["Verse"]
local minVerse = args["MinVerse"]
local maxVerse = args["MaxVerse"]
if not verseFilter and not (minVerse and maxVerse) then
table.insert(debugLog, "ℹ️ No verse filters applied; returning original data.")
return data
end
local filtered = {}
-- Compare labels like "2a" <= "2b"
local function labelLE(a, b)
local anum, alet = a:match("^(%d+)(%a?)$")
local bnum, blet = b:match("^(%d+)(%a?)$")
anum, bnum = tonumber(anum), tonumber(bnum)
if not anum or not bnum then return false end
if anum ~= bnum then
return anum < bnum
end
return (alet or "") <= (blet or "")
end
-- Normalize endpoints: if just a number, treat
-- Min as e.g. "1" → "1" (same, includes sub-labels)
-- Max as e.g. "2" → "2z" so that 2a, 2b… are <=
local function normalizeMin(v)
if not v then return nil end
local num, let = v:match("^(%d+)(%a?)$")
if num and let == "" then
return num -- "1"
end
return v
end
local function normalizeMax(v)
if not v then return nil end
local num, let = v:match("^(%d+)(%a?)$")
if num and let == "" then
return num .. "z" -- "2z" will be >= "2a","2b", etc.
end
return v
end
minVerse = normalizeMin(minVerse)
maxVerse = normalizeMax(maxVerse)
for _, item in ipairs(data) do
local keep = false
if verseFilter then
local num, let = verseFilter:match("^(%d+)(%a?)$")
if let ~= "" then
-- exact match
if item.label == verseFilter then
keep = true
end
else
-- pure number → match any sub-labels too
local itemNum = tonumber(item.label:match("^(%d+)"))
if itemNum and tonumber(num) == itemNum then
keep = true
end
end
elseif minVerse and maxVerse then
if labelLE(minVerse, item.label) and labelLE(item.label, maxVerse) then
keep = true
end
end
if keep then
table.insert(filtered, item)
end
end
table.insert(debugLog, string.format(
"✅ Filtered content by .label. Original count: %d → Filtered: %d",
#data, #filtered
))
return filtered
end
local function parseMaybeJSON(input)
if type(input) ~= "string" then return input end
local ok, result = pcall(mw.text.jsonDecode, input)
if ok then return result else return input end
end
local function loadJSONFromPage(title, warnings)
warnings = warnings or {}
if not title or mw.text.trim(title)== "" then
table.insert(warnings, "⚠️ Empty title passed in.")
return nil
end
local page = mw.title.new(title)
if not page or not page.exists then
table.insert(warnings, "⚠️ Page not found: " .. tostring(title))
return nil
end
local content = page:getContent()
if not content then
table.insert(warnings, "⚠️ No content on page: " .. title)
return nil
end
local ok, parsed = pcall(mw.text.jsonDecode, content)
if not ok or type(parsed) ~= "table" then
table.insert(warnings, "⚠️ Invalid JSON on page: " .. title)
return nil
end
return parsed
end
local function groupTokensByVerse(tokens, getKey)
local groups = {}
for _, token in ipairs(tokens or {}) do
local key = getKey(token)
if not groups[key] then
groups[key] = {}
end
table.insert(groups[key], token)
end
return groups
end
local function sortEnglishTokens(englishGroups)
for _, group in pairs(englishGroups or {}) do
table.sort(group, function(a, b)
return (a.englishIndex or 0) < (b.englishIndex or 0)
end)
end
end
local function loadFiles(args)
quiet = args["Quiet"] == "true" or args["Quiet"] == "yes"
chapter = args["1"] or args["chapter"] or args["Chapter"] or "200"
chapter = mw.text.trim(chapter)
local sp = args["separatePunctuation"]
sp = sp and mw.text.trim(mw.ustring.lower(sp)) or nil
separatePunctuation = not (sp == "no" or sp == "false")
-- Determine whether to load annotations
local useAnnotations = args["participants"] == "true" or args["participants"] == "yes"
local annotationFile = useAnnotations and (args["annotations"] or ("Data/" .. chapter .. "/annotations/participants.json")) or nil
local warnings = {}
local hebrewFile = (args["hebrew"] or "hebrew.v5") .. ".json"
local hebrewMap = loadJSONFromPage("Data/" .. chapter .. "/alignment/" .. hebrewFile)
if hebrewMap and type(hebrewMap) == "table" and next(hebrewMap) ~= nil then
if debug then table.insert(debugLog, "Hebrew file: " .. hebrewFile) end
else
hebrewMap = loadJSONFromPage("Data/" .. chapter .. "/alignment/hebrew.v5.json", warnings)
if hebrewMap and type(hebrewMap) == "table" and next(hebrewMap) ~= nil then
if debug then table.insert(debugLog, "Hebrew file: hebrew.v5.json") end
else
hebrewMap = loadJSONFromPage("Data/" .. chapter .. "/alignment/hebrew.map.json", warnings)
end
if not hebrewMap then
if quiet then
return ""
else
return "❌ Hebrew map not found for chapter " .. tostring(chapter) .. "\n" .. table.concat(warnings, "\n")
end
end
end
local englishFile = (args["english"] or "english.v5") .. ".json"
local englishMap = loadJSONFromPage("Data/" .. chapter .. "/alignment/" .. englishFile)
if englishMap and type(englishMap) == "table" and next(englishMap) ~= nil then
if debug then table.insert(debugLog, "English file: " .. englishFile) end
else
englishMap = loadJSONFromPage("Data/" .. chapter .. "/alignment/english.v5.json", warnings)
if englishMap and type(englishMap) == "table" and next(englishMap) ~= nil then
if debug then table.insert(debugLog, "English file: english.v5.json") end
else
englishMap = loadJSONFromPage("Data/" .. chapter .. "/alignment/english.v4.json", warnings)
if englishMap and type(englishMap) == "table" and next(englishMap) ~= nil then
if debug then table.insert(debugLog, "English file: english.v4.json") end
else
englishMap = loadJSONFromPage("Data/" .. chapter .. "/alignment/english.map.json")
if englishMap and type(englishMap) == "table" and next(englishMap) ~= nil then
if debug then table.insert(debugLog, "English file: english.map.json") end
else
englishMap = loadJSONFromPage("Data/" .. chapter .. "/alignment/cbc.aligned.json")
if englishMap and type(englishMap) == "table" and next(englishMap) ~= nil then
if debug then table.insert(debugLog, "English file: cbc.aligned.json") end
else
if quiet then
return ""
else
return "❌ English map not found for chapter " .. tostring(chapter).. "\n" .. table.concat(warnings, "\n")
end
end
end
end
end
end
local lineationFile = (("Data/" .. chapter .. "/lineation/")
.. (args["lineation"] or "poetic-lines.v5")
.. ".json")
local lineationMap = loadJSONFromPage(lineationFile)
if lineationMap then
if debug then table.insert(debugLog, "lineation file: " .. lineationFile) end
else
lineationMap = loadJSONFromPage("Data/" .. chapter .. "/lineation/poetic-lines.v2.json")
if lineationMap then
if debug then table.insert(debugLog, "lineation file: " .. "Data/" .. chapter .. "/lineation/poetic-lines.v2.json") end
else
lineationMap = loadJSONFromPage("Data/" .. chapter .. "/lineation/poetic-lines.json")
if lineationMap then
if debug then table.insert(debugLog, "lineation file: " .. "Data/" .. chapter .. "/lineation/poetic-lines.json") end
else
lineationMap = loadJSONFromPage("Data/" .. chapter .. "/lineation/verses.json")
if lineationMap then
if debug then table.insert(debugLog, "lineation file: " .. "Data/" .. chapter .. "/lineation/verses.json") end
end
end
end
end
--local colormap = parseMaybeJSON(frame.args["colormap"])
local colormap = loadJSONFromPage(args["colormap"])
local annotations = loadJSONFromPage(annotationFile) or {}
local emendations = loadJSONFromPage("Data/" .. chapter .. "/macula/changes/hebrew.json") or {}
local emendMap = {}
for _, e in ipairs(emendations) do
if e.id and e.reason then
local cls
if e.reason == "emendation" then
cls = "emendation"
elseif e.reason == "revocalization" then
cls = "revocalization"
elseif e.reason == "insertion" then
cls = "insertion"
end
if cls then
emendMap[e.id] = { reason = e.reason, cls = cls }
end
end
end
return hebrewMap, englishMap, lineationMap, annotations, colormap, emendMap
end
local function splitHbIDs(s)
local t = {}
for id in tostring(s or ""):gmatch("%S+") do table.insert(t, id) end
return t
end
local function hasLineLabel(ids, label)
for _, id in ipairs(splitHbIDs(ids)) do
if id == ("line-" .. label) then return true end
end
return false
end
local function hasAnyLineLabel(ids)
for _, id in ipairs(splitHbIDs(ids)) do
if id:match("^line%-") then
return true
end
end
return false
end
local function lineLabelMatches(rowLabel, tokenLabel)
-- rowLabel: "14a"
-- tokenLabel: "14"
-- Return true if tokenLabel is a prefix of rowLabel
rowLabel = tostring(rowLabel or "")
tokenLabel = tostring(tokenLabel or "")
return mw.ustring.sub(rowLabel, 1, #tokenLabel) == tokenLabel
end
local function colorForIDs(ids, amap)
for _, id in ipairs(splitHbIDs(ids)) do
if amap[id] then return amap[id] end
end
return nil
end
-- Build searchable metadata for English tokens:
-- • seq: global sequence (their original order)
-- • minPos: earliest Hebrew index among any hbIDs on the token
-- • labels: any "line-<label>" tags found on the token
local function buildEnglishMeta(englishMap, hbIndexMap)
local meta = {}
local byLabel = {}
for i, e in ipairs(englishMap or {}) do
local labels = {}
local minPos
for _, raw in ipairs((function(s)
local t = {}
for id in tostring(s or ""):gmatch("%S+") do t[#t+1] = id end
return t
end)(e.hbID)) do
if raw:match("^line%-") then
local lbl = raw:gsub("^line%-", "")
labels[#labels+1] = lbl
else
local key = canonHb(raw)
local pos = key and hbIndexMap[key] or nil
if pos and (not minPos or pos < minPos) then minPos = pos end
end
end
local m = { ref = e, seq = i, minPos = minPos, labels = labels }
meta[#meta+1] = m
for _, lbl in ipairs(labels) do
byLabel[lbl] = byLabel[lbl] or {}
byLabel[lbl][#byLabel[lbl]+1] = m
end
end
return meta, byLabel
end
local function selectEnglishForWindow(englishMeta, englishByLabel, label, startIdx, endIdx, nextStartIdx)
local chosen = {}
local seen = {}
local minSeq, maxSeq
local explicitlyAligned = {}
local function add(m, aligned)
if not seen[m.seq] then
seen[m.seq] = true
chosen[#chosen+1] = { meta = m, aligned = aligned }
if not minSeq or m.seq < minSeq then minSeq = m.seq end
if not maxSeq or m.seq > maxSeq then maxSeq = m.seq end
if aligned then explicitlyAligned[m.seq] = true end
end
end
-- 1) explicit line labels
-- for _, m in ipairs(englishByLabel[label] or {}) do add(m, true) end
-- match parent labels like "14" to "14a","14b", etc.
for tokenLabel, list in pairs(englishByLabel or {}) do
if lineLabelMatches(label, tokenLabel) then
for _, m in ipairs(list) do
add(m, true)
end
end
end
-- 2) aligned tokens inside the Hebrew span
for _, m in ipairs(englishMeta or {}) do
if m.minPos and m.minPos >= startIdx and m.minPos <= endIdx then
add(m, true)
end
end
-- 3) include everything between first and last aligned
if minSeq and maxSeq then
for _, m in ipairs(englishMeta or {}) do
if m.seq >= minSeq and m.seq <= maxSeq then
add(m, explicitlyAligned[m.seq] or false)
end
end
end
-- Stable order
table.sort(chosen, function(a, b)
local ai = tonumber(a.meta.ref.englishIndex or 0) or 0
local bi = tonumber(b.meta.ref.englishIndex or 0) or 0
if ai ~= bi then return ai < bi end
return a.meta.seq < b.meta.seq
end)
-- Return refs with classification
local out = {}
for _, m in ipairs(chosen) do
local ref = m.meta.ref
if not m.aligned then
if not ref._class then ref._class = {} end
if m.meta.minPos then
-- token links to *somewhere*, but not in this line’s Hebrew window
ref._class[label] = "cbc-out-of-order"
else
-- token links to nowhere (no hbID)
ref._class[label] = "cbc-supplied"
end
else
--ref._class[label] = nil
end
out[#out+1] = ref
end
return out
end
function internalBuildAlignedHebrewEnglishTable(frame)
debug = frame.args["debug"] == "true" or frame.args["debug"] == "yes"
local html = {}
skipHeader = frame.args["skipHeader"] == "yes"
tableClass = frame.args["tableClass"] or ""
if tableClass == "" then
tableClass = "wikitable psalm-table"
end
table.insert(html, "{| class='" .. tableClass .. "'")
if not skipHeader then
table.insert(html, "! Hebrew !! Verse !! English")
end
local hebrewMap, englishMap, lineationMap, annotations, colormap, emendMap = loadFiles(frame.args)
local alignOnVerses = frame.args["alignOnVerses"] == "true" or frame.args["alignOnVerses"] == "yes"
-- Basic counts
dbg("📦 Loaded: hebrew=%s, english=%s, lineation=%s",
type(hebrewMap), type(englishMap), type(lineationMap))
dbg("📊 Counts: #hebrew=%d, #english=%d, #lineation=%d",
#(hebrewMap or {}), #(englishMap or {}), #(lineationMap or {}))
-- 🔍 Build annotation color lookup table
local participantColorMap = {}
if type(colormap) == "table" then
for _, entry in ipairs(colormap or {}) do
if entry.participant and entry.color then
participantColorMap[entry.participant] = entry.color
end
end
end
local annotationMap = {}
if type(annotations) == "table" then
for _, ann in ipairs(annotations or {}) do
if ann.hbID and ann.participant then
local color = participantColorMap[ann.participant] or ann.color
if color then
annotationMap[ann.hbID] = color
end
end
end
end
local rows = {}
if alignOnVerses then
-- don't even worry about lineation
-- Build rows directly from each token’s own .verse
local seen = {}
for _, h in ipairs(hebrewMap or {}) do
local v = tostring(h.verse or ""):gsub("%s+", "")
if v ~= "" then
if not seen[v] then
seen[v] = { hebrew = {}, english = {} }
end
table.insert(seen[v].hebrew, h)
end
end
for _, e in ipairs(englishMap or {}) do
local v = tostring(e.verse or ""):gsub("%s+", "")
if v ~= "" then
if not seen[v] then
seen[v] = { hebrew = {}, english = {} }
end
table.insert(seen[v].english, e)
end
end
-- Sort verse keys (numeric then suffix)
local verseKeys = {}
for v in pairs(seen) do verseKeys[#verseKeys+1] = v end
table.sort(verseKeys, function(a, b)
local na, sa = parseVerseKey(a)
local nb, sb = parseVerseKey(b)
if na == nb then return sa < sb end
return na < nb
end)
-- One row per verse; label = item.verse exactly
for _, v in ipairs(verseKeys) do
rows[#rows+1] = {
label = v, -- ← always from item.verse
hebrew = seen[v].hebrew,
english = seen[v].english
}
end
elseif type(lineationMap) == "table" then
-- build a map: hbID -> position within ALL words (store canonical and plain alias)
local hbIndexMap = {}
local nilHbCount = 0
for i, token in ipairs(hebrewMap or {}) do
local h = canonHb(token.hbID)
if h then
hbIndexMap[h] = i
hbIndexMap[h:gsub("^hb%-","")] = i
else
nilHbCount = nilHbCount + 1
end
end
if nilHbCount > 0 then
dbg("⚠️ Hebrew tokens missing hbID: %d", nilHbCount)
end
local englishMeta, englishByLabel = buildEnglishMeta(englishMap, hbIndexMap)
-- strictly following alignment
-- (remove/ignore eIndex + lineLabel variables)
-- for each row, compute its Hebrew span
for i, row in ipairs(lineationMap) do
-- get start and next-start
local startHBID = canonHb(row.hbID)
local startIndex = (startHBID and hbIndexMap[startHBID]) or 1
local stopBefore = row.stopBefore or row["stop-before"]
local nextHBID = canonHb(stopBefore) or canonHb((lineationMap[i + 1] or {}).hbID)
local nextIdx = nextHBID and hbIndexMap[nextHBID] or nil
-- if next line exists, cap this slice strictly before it
local endIndex
if nextIdx and nextIdx > startIndex then
endIndex = nextIdx - 1
else
endIndex = #hebrewMap
end
-- protect against reverse ordering (like jumping back)
if endIndex < startIndex then
endIndex = startIndex
end
local hebrewTokens = {}
for j = startIndex, endIndex do hebrewTokens[#hebrewTokens+1] = hebrewMap[j] end
local englishTokens = selectEnglishForWindow(
englishMeta, englishByLabel, tostring(row.label),
startIndex, endIndex, nextIdx
)
rows[#rows+1] = { label = row.label, hebrew = hebrewTokens, english = englishTokens }
end
else
-- fallback
local function cleanVerseKey(t)
return tostring(t.verse):gsub("%s+", "")
end
local grouped = groupTokensByVerse(hebrewMap, cleanVerseKey)
local englishGrouped = groupTokensByVerse(englishMap, cleanVerseKey)
sortEnglishTokens(englishGrouped)
local orderedLabels = {}
for k in pairs(grouped) do table.insert(orderedLabels, k) end
-- 🧠 Sort numerically (1 < 2 < 10), even with suffixes (1a, 1b)
local function parseVerseKey(key)
local num, suffix = key:match("^(%d+)([a-zA-Z]*)$")
return tonumber(num), suffix or ""
end
table.sort(orderedLabels, function(a, b)
local na, sa = parseVerseKey(a)
local nb, sb = parseVerseKey(b)
if na == nb then return sa < sb end
return na < nb
end)
for _, label in ipairs(orderedLabels or {}) do
table.insert(rows, {
label = label,
hebrew = grouped[label],
english = englishGrouped[label] or {}
})
end
end
rows = applyVerseFilter(rows, frame.args)
rows = rows or {}
if type(rows) ~= "table" then
error("❌ rows is not a table; got: " .. tostring(rows))
end
for _, row in ipairs(rows or {}) do
local hebrewCell = {}
-- Accumulators for graphical word
local currentWord = {}
local function flushWord()
if #currentWord > 0 then
table.insert(hebrewCell,
"<span class='nowrap'>" .. table.concat(currentWord) .. "</span>"
)
currentWord = {}
end
end
for _, h in ipairs(row.hebrew or {}) do
local cls = h.hbID or ""
local before = type(h.before) == "string" and h.before or ""
local after = type(h.after) == "string" and h.after or ""
local color = annotationMap[cls]
local styleAttr = color and (" style='font-weight:bold; background-color:" .. color .. "'") or ""
if h.maculaID and emendMap[h.maculaID] then
local em = emendMap[h.maculaID]
cls = cls .. " " .. em.cls
-- Optional tooltip with reason
styleAttr = styleAttr .. " title='" .. mw.text.nowiki(em.reason) .. "'"
end
-- separate trailing spaces from punctuation/quotes
local afterCore, trailingSpaces = after:match("^(.-)(%s+)$")
if not afterCore then
afterCore, trailingSpaces = after, ""
end
if separatePunctuation then
-- build the inner span for this hbID
local inner = string.format(
"%s<span class='hebrew %s'%s>%s</span>%s",
before,
cls,
styleAttr,
h.text or "",
afterCore
)
table.insert(currentWord, inner)
else
-- build the inner span for this hbID
local inner = string.format(
"%s<span class='hebrew %s'%s>%s%s</span>",
before,
cls,
styleAttr,
h.text or "",
afterCore
)
table.insert(currentWord, inner)
end
-- if there's space(s), flush the graphical word and emit the spaces
if trailingSpaces ~= "" then
flushWord()
table.insert(hebrewCell, trailingSpaces)
end
end
-- flush the last graphical word
flushWord()
local englishCell = {}
for _, e in ipairs(row.english or {}) do
local classes = {"english"}
if e.hbID and e.hbID ~= "" then
table.insert(classes, e.hbID)
end
if e._class and e._class[row.label] then
table.insert(classes, e._class[row.label])
end
local cls = table.concat(classes, " ")
local before = type(e.before) == "string" and e.before or ""
local after = type(e.after) == "string" and e.after or ""
local color = colorForIDs(cls, annotationMap)
local styleAttr = color and (" style='font-weight:bold; background-color:" .. color .. "'") or ""
if separatePunctuation then
table.insert(englishCell,
string.format("%s<span class='english %s'%s>%s</span>%s",
before, cls, styleAttr, e.Word or "", after))
else
table.insert(englishCell,
string.format("%s<span class='english %s'%s>%s%s</span>",
before, cls, styleAttr, e.Word or "", after))
end
end
table.insert(html, string.format("|-\n| class='hebrew-cell' | %s || class='verse-cell' | '''%s''' || class='cbc-cell' | %s",
table.concat(hebrewCell, ""), row.label, table.concat(englishCell, "")))
end
table.insert(html, "|}")
if debug then
return table.concat(debugLog, "<br/>") .. "<br/>".. table.concat(html, "\n")
else
return table.concat(html, "\n")
end
end
function p.buildAlignedHebrewEnglishTable(frame)
local success, result = pcall(function()
return internalBuildAlignedHebrewEnglishTable(frame)
end)
if not success then
local alt = frame.args and frame.args.alternativeCode
if alt and alt ~= "" then
-- Try preprocessing the alternativeCode
local ok, altResult = pcall(function()
return frame:preprocess(alt)
end)
if ok then
return altResult
else
-- Even preprocessing failed
if debug then
table.insert(debugLog, "❌ Error in alternativeCode: " .. tostring(altResult))
return table.concat(debugLog, "\n")
elseif quiet then
return ""
else
return "Psalm data not yet available. (alt error: " .. tostring(altResult) .. ")"
end
end
end
-- No alternativeCode given
if debug then
table.insert(debugLog, "❌ Error: " .. tostring(result))
return table.concat(debugLog, "\n")
elseif quiet then
return ""
else
return "Psalm data not yet available."
end
end
return result
end
return p