Hello everyone,
I am currently trying to build a minimal example showing how to process
TEI-XML inside ConTeXt (see here for explanations :
https://wiki.contextgarden.net/TEI_xml).
To do this I created two small test files:
1. cicero-sample-tei.xml (a very small TEI fragment), which is :
% cicero-sample-tei.xml
<?xml version="1.0" encoding="UTF-8"?>
<!-- Exemple simplifié sans espace de noms TEI -->
<TEI>
<text>
<body>
<div type="edition" xml:lang="la">
<head>Exemple d'édition critique (Cicero)</head>
<p>
<persName ref="#cicero">Marcus Tullius Cicero</persName> in
<placeName ref="#roma">Roma</placeName>
<app>
<lem wit="#A">virtutem magnam</lem>
<rdg wit="#B">virtutem magni</rdg>
</app>
laudavit.
</p>
<note xml:lang="fr">
L'apparat signale une variation sur l'adjectif qualifiant
<code>virtus</code>.
</note>
</div>
<div type="translation" xml:lang="fr">
<head>Traduction française (exemple)</head>
<p>
Marcus Tullius Cicéron, à Rome, loua la vertu.
</p>
</div>
</body>
</text>
</TEI>
2.**cicero-tei-demo.tex (a ConTeXt file using Lua to parse the XML),
which is :
% cicero-tei-demo.tex
\setuppapersize[A5]
\setupbodyfont[latin-modern]
\starttext
\startluacode
local lom = require("lxp.lom")
-- Fonctions utilitaires
local function readfile(name)
local f = assert(io.open(name,"r"))
local c = f:read("*all")
f:close()
return c
end
local function find_child(node, tag)
for _, child in ipairs(node) do
if type(child) == "table" and child.tag == tag then
return child
end
end
return nil
end
local function find_children(node, tag)
local t = {}
for _, child in ipairs(node) do
if type(child) == "table" and child.tag == tag then
t[#t+1] = child
end
end
return t
end
local function find_div_by_type(body, dtype)
for _, child in ipairs(body) do
if type(child) == "table"
and child.tag == "div"
and child.attr
and child.attr.type == dtype then
return child
end
end
return nil
end
local function text_content(node)
local parts = {}
for _, item in ipairs(node) do
if type(item) == "string" then
parts[#parts+1] = item
elseif type(item) == "table"
and item[1]
and type(item[1]) == "string" then
parts[#parts+1] = item[1]
end
end
return table.concat(parts, " ")
end
-- Index simples (collections de noms)
local persons, places = {}
local function add_unique(tbl, s)
if not s or s == "" then
return
end
tbl._seen = tbl._seen or {}
if not tbl._seen[s] then
tbl._seen[s] = true
tbl[#tbl+1] = s
end
end
-- Lecture du fichier TEI
local xml_data = readfile("cicero-sample-tei.xml")
local parsed = assert(lom.parse(xml_data))
local TEI = parsed[1]
local text = find_child(TEI, "text")
local body = find_child(text, "body")
local edition_div = find_div_by_type(body, "edition")
local head_edition = find_child(edition_div, "head")
local p_edition = find_child(edition_div, "p")
local extra_note = find_child(edition_div, "note")
local trans_div = find_div_by_type(body, "translation")
local head_trans = trans_div and find_child(trans_div, "head")
local p_trans = trans_div and find_child(trans_div, "p")
-- Traitement éditorial du paragraphe latin
local function process_edition_paragraph(pnode)
for _, item in ipairs(pnode) do
if type(item) == "string" then
context(item)
elseif type(item) == "table" then
if item.tag == "persName" then
local n = text_content(item)
add_unique(persons, n)
context("\\sc{" .. n .. "} ")
elseif item.tag == "placeName" then
local n = text_content(item)
add_unique(places, n)
context("\\em{" .. n .. "} ")
elseif item.tag == "app" then
local lem = find_child(item, "lem")
local rdgs = find_children(item, "rdg")
context(text_content(lem))
if #rdgs > 0 then
local note_parts = {}
for _, r in ipairs(rdgs) do
local wit = r.attr and r.attr.wit or ""
local txt = text_content(r)
if wit ~= "" then
note_parts[#note_parts+1] = wit .. ": " .. txt
else
note_parts[#note_parts+1] = txt
end
end
context.footnote(table.concat(note_parts, "; "))
end
elseif item.tag == "note" then
context.footnote(text_content(item))
else
context(text_content(item))
end
end
end
end
-- Composition du document
context.chapter(head_edition[1])
process_edition_paragraph(p_edition)
context.par()
if extra_note then
context.footnote(extra_note[1])
end
if head_trans and p_trans then
context.blank()
context.subject(head_trans[1])
context(p_trans[1])
context.par()
end
-- Mini-index
context.blank()
context.section("Personnes mentionnées")
for _, p in ipairs(persons) do
context.par()
context(p)
end
context.blank()
context.section("Lieux mentionnés")
for _, l in ipairs(places) do
context.par()
context(l)
end
\stopluacode
\stoptext
The goal is simply to read the XML file, extract a few nodes, and print
them in ConTeXt — nothing advanced, just a proof of concept to
understand the workflow.
However, when compiling the TeX file with LMTX, ConTeXt aborts
immediately with the following error:
|lua error: module'lxp.lom'notfound: no field package.preload['lxp.lom']
no file '.../lua/lxp/lom.lua'... |
So the problem appears before any XML processing takes place: LMTX
cannot load |lxp.lom|.
I understand that LMTX uses a sandboxed Lua environment, so maybe |lxp|
is not part of the allowed modules anymore.
My question is therefore quite simple:
Is there a recommended way (or example) to parse XML/TEI within LMTX
today, given that |lxp.lom| is unavailable? Should one use the built-in
ConTeXt XML tools instead (|lxml|, |xml.filter|, etc.)?
Any advice or pointer to an up-to-date example would be very helpful.
Thank you very much,
JP
___________________________________________________________________________________
If your question is of interest to others as well, please add an entry to the
Wiki!
maillist : [email protected] /
https://mailman.ntg.nl/mailman3/lists/ntg-context.ntg.nl
webpage : https://www.pragma-ade.nl / https://context.aanhet.net (mirror)
archive : https://github.com/contextgarden/context
wiki : https://wiki.contextgarden.net
___________________________________________________________________________________