improve auto-nbsp detection algorithm
This commit is contained in:
@@ -70,7 +70,19 @@
|
||||
"next_page_attachment": "začíná na další straně",
|
||||
"attached_bellow": "dále přiloženo",
|
||||
|
||||
"place_assignment": "Sem vložte zadání"
|
||||
"place_assignment": "Sem vložte zadání",
|
||||
|
||||
"break_rules": {
|
||||
"space_after": [
|
||||
"((?i)[kosuvzai])",
|
||||
"(tj|tzv|tzn)\\."
|
||||
],
|
||||
"nonbreaking_terms": [
|
||||
"(s\\. r\\. o|a\\. s|v\\. o\\. s)\\.",
|
||||
"č\\. ([pe]|ev)\\.",
|
||||
"ev?\\. č\\."
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"en": {
|
||||
|
||||
@@ -46,30 +46,29 @@
|
||||
}
|
||||
|
||||
#let set_czech_nonbreakable_terms(content) = {
|
||||
let space_after = (
|
||||
"[kosuvzai]",
|
||||
"(tj|tzv|tzn)\.",
|
||||
let rules = get_lang_item("cs", "break_rules");
|
||||
let space_after = rules.at("space_after");
|
||||
let nonbreaking_terms = rules.at("nonbreaking_terms");
|
||||
|
||||
let terms = "\b(" + nonbreaking_terms.join("|") + ")";
|
||||
let chain = (
|
||||
"\b((" + space_after.join("|") + ") )+" +
|
||||
"(" + terms + "|\w+\b)"
|
||||
);
|
||||
show regex("\b((?i)(" + space_after.join("|") + ") )+\w+\b"): match => {
|
||||
box(match);
|
||||
}
|
||||
|
||||
let nonbreaking_abbreviations = (
|
||||
"a. s",
|
||||
"s. r. o",
|
||||
"v. o. s",
|
||||
"k. s",
|
||||
"n. p",
|
||||
"p. o",
|
||||
"č. ([pe]|ev)",
|
||||
"ev?. č",
|
||||
);
|
||||
show regex(
|
||||
"(?i)\b(" + nonbreaking_abbreviations.map((v) => { v.replace(".", "\\.") }).join("|") + ")\."
|
||||
): match => {
|
||||
box(match);
|
||||
|
||||
let apply_rules(exprs: ("",), content) = {
|
||||
let res = content;
|
||||
for expr in exprs {
|
||||
res = {
|
||||
show regex(expr): box;
|
||||
res;
|
||||
};
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
show heading: apply_rules.with(exprs: (chain, terms));
|
||||
show par: apply_rules.with(exprs: (chain, terms));
|
||||
content
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user