improve auto-nbsp detection algorithm

This commit is contained in:
2025-10-21 22:17:48 +02:00
parent bdbda1189f
commit 378bd509c9
2 changed files with 33 additions and 22 deletions

View File

@@ -70,7 +70,19 @@
"next_page_attachment": "začíná na další straně",
"attached_bellow": "dále přiloženo",
"place_assignment": "Sem vložte zadání"
"place_assignment": "Sem vložte zadání",
"break_rules": {
"space_after": [
"((?i)[kosuvzai])",
"(tj|tzv|tzn)\\."
],
"nonbreaking_terms": [
"(s\\. r\\. o|a\\. s|v\\. o\\. s)\\.",
"č\\. ([pe]|ev)\\.",
"ev?\\. č\\."
]
}
},
"en": {

View File

@@ -46,30 +46,29 @@
}
#let set_czech_nonbreakable_terms(content) = {
let space_after = (
"[kosuvzai]",
"(tj|tzv|tzn)\.",
let rules = get_lang_item("cs", "break_rules");
let space_after = rules.at("space_after");
let nonbreaking_terms = rules.at("nonbreaking_terms");
let terms = "\b(" + nonbreaking_terms.join("|") + ")";
let chain = (
"\b((" + space_after.join("|") + ") )+" +
"(" + terms + "|\w+\b)"
);
show regex("\b((?i)(" + space_after.join("|") + ") )+\w+\b"): match => {
box(match);
}
let nonbreaking_abbreviations = (
"a. s",
"s. r. o",
"v. o. s",
"k. s",
"n. p",
"p. o",
"č. ([pe]|ev)",
"ev?. č",
);
show regex(
"(?i)\b(" + nonbreaking_abbreviations.map((v) => { v.replace(".", "\\.") }).join("|") + ")\."
): match => {
box(match);
let apply_rules(exprs: ("",), content) = {
let res = content;
for expr in exprs {
res = {
show regex(expr): box;
res;
};
}
res
}
show heading: apply_rules.with(exprs: (chain, terms));
show par: apply_rules.with(exprs: (chain, terms));
content
}