Git commit 33f3c459ac20f66403e6e8180ed6a1cb346bdcc0 by Johnny Jazeix, on behalf of Albéric Poinsard. Committed on 25/10/2024 at 16:29. Pushed by jjazeix into branch 'master'.
[fr] Add a new sieve for french BFW translators Adding a useful sieve to make several automated correction. Designed to be used on french BFW po files, but can be used on other places. M +76 -0 doc/user/sieving.docbook M +1 -0 lang/fr/sieve/CMakeLists.txt A +240 -0 lang/fr/sieve/wesnoth.py https://invent.kde.org/sdk/pology/-/commit/33f3c459ac20f66403e6e8180ed6a1cb346bdcc0 diff --git a/doc/user/sieving.docbook b/doc/user/sieving.docbook index 8ffdeb9ee..1c6d407de 100644 --- a/doc/user/sieving.docbook +++ b/doc/user/sieving.docbook @@ -2583,6 +2583,82 @@ Note that percent characters in the <literal>plural-forms</literal> field are es </sect2> +<sect2 id="sv-fr:wesnoth"> +<title><command>fr:wesnoth</command></title> + +<para>A set of rules and corrections that can be automatically applied to the French translations of the Battle For Wesnoth.</para> + +<para>There are three levels of correction: +<itemizedlist> + +<listitem> +<para>level 1: basic correction</para> +<para>Use unbreakable space before « » ; : ! ? % and remove double space.</para> +</listitem> + + +<listitem> +<para>level 2: advanced correction (may break something)</para> +<para>Replace ' by typographic apostrophe ’, and paid attention to pango/cairo balise and escaped <literal>\\'</literal>, like in <literal><italic>text='...'</italic></literal> +It may need to be run several times to work correctly.</para> +</listitem> + +<listitem> +<para>level 3: very unstable correction (surely break something)</para> +<para>Remove space before . and ,</para> +</listitem> + +</itemizedlist> + +You can also use special filters, which make one specific task. + +</para> + +<para>Parameters: + +<variablelist> + +<varlistentry> +<term><option>quiet</option></term> +<listitem> +<para>Don't show the summary at the end of the sieve. Useful for script usage.</para> +</listitem> +</varlistentry> + +<varlistentry> +<term><option>level</option></term> +<listitem> +<para>Set the level(s) of correction (1, 2 or 3). To use multiple levels, just put them together like <literal>level:12</literal> for levels 1 and 2</para> +</listitem> +</varlistentry> + +<varlistentry> +<term><option>extra_spaces</option></term> +<listitem> +<para>(Special filter) Replace extra spaces at start and end of a message by punctuation space. You must give the number of the message where you want to replace extra spaces, in a comma-separated list. For example, <literal>extra_spaces:12,45,789</literal> to replace in the messages 12, 45 and 789.</para> +</listitem> +</varlistentry> + +<varlistentry> +<term><option>ellipsis3points</option></term> +<listitem> +<para>(Special filter) Replace all Unicode ellipsis (…) by three dots (...).</para> +</listitem> +</varlistentry> + +<varlistentry> +<term><option>ellipsisUnicode</option></term> +<listitem> +<para>(Special filter) Replace all three dots (...) by Unicode ellipsis (…). May break something.</para> +</listitem> +</varlistentry> + +</variablelist> + +</para> + +</sect2> + <sect2 id="sv-ru:fill-doc-date-kde"> <title><command>ru:fill-doc-date-kde</command></title> diff --git a/lang/fr/sieve/CMakeLists.txt b/lang/fr/sieve/CMakeLists.txt index 9d4bb3c83..a55594b3a 100644 --- a/lang/fr/sieve/CMakeLists.txt +++ b/lang/fr/sieve/CMakeLists.txt @@ -1,6 +1,7 @@ set(sieves setUbsp.py setApostrophe.py + wesnoth.py ) get_current_source_subdir(srcsubdir) install(FILES ${sieves} DESTINATION ${DATA_INSTALL_DIR}/${srcsubdir}) diff --git a/lang/fr/sieve/wesnoth.py b/lang/fr/sieve/wesnoth.py new file mode 100644 index 000000000..d89f6b90e --- /dev/null +++ b/lang/fr/sieve/wesnoth.py @@ -0,0 +1,240 @@ +# -*- coding: UTF-8 -*- + +""" +Auto correct translation according to the rules etablished by the French +BFW traduction team. +It includes unbreakable spaces, hyphen (…), apostrophe, extra spaces (option). + +inspired from other sieves +@author: alberic89 <alberi...@gmx.com> +@license: GPLv3""" + +import re + +from typing import List +from pology import _, n_ +from pology.report import report +from pology.sieve import add_param_filter + + +def setup_sieve(p): + p.set_desc( + _( + "@info sieve description", + "Correct message according to BFW french standard", + ) + ) + + p.add_param( + "quiet", + bool, + defval=False, + desc=_( + "@info sieve parameter description", + "Do not show summary (for script usage)", + ), + ) + + p.add_param( + "level", + str, + defval="", + desc=_( + "@info sieve parameter description", + "Set level of correction (1, 2 and 3). You can use multiple levels, for example level:12", + ), + ) + + p.add_param( + "extra_spaces", + list, + defval=[], + desc=_( + "@info sieve parameter description", + "Replace all extra spaces by punctuation space on the message of the numero given. You can specify multiple messages with comma-separated list.", + ), + ) + + p.add_param( + "ellipsis3points", + bool, + defval=False, + desc=_( + "@info sieve parameter description", + "Replace all Unicode ellipsis (…) by three dots (...)", + ), + ) + + p.add_param( + "ellipsisUnicode", + bool, + defval=False, + desc=_( + "@info sieve parameter description", + "Replace all three dots (...) by Unicode ellipsis (…)", + ), + ) + + +class SpecialFilter: + """A special filter""" + + def __init__(self, name, value, condition, action): + """value is a boolean to know if this filter must be used. + action should be a function with the msg object. + condition a function which takes msg in argument and returns boolean (if the filter is conditional)""" + self.name = name + self.value = value + self.condition = condition + self.action = action + + def process(self, msg): + if self.condition(msg): + for i in range(len(msg.msgstr)): + msg.msgstr[i] = self.action(msg.msgstr[i]) + return msg + + def __eq__(self, y): + return y == self.value + + def __repr__(self): + return f"{self.name} : {self.value} : {self.action}" + + +def _replace_group(match, group, replacement): + if groupn := match.group(group): + return match.group().replace(groupn, replacement) + else: + return match.group() + + +class Sieve(object): + """Correct translation according to BFW French standard""" + + # apostrophe typographique "’" : \u2019 + # espace insécable " " : \u00A0 + # espace insécable fine " " : \u202F + + def __init__(self, params): + self.nmatch = 0 + self.p = params + self.level = params.level + nums = [0] + for _ in params.extra_spaces: + if _.isdigit(): + nums.append(nums.pop() * 10 + int(_)) + else: + nums.append(0) + + self.spaces = nums + self.space_start = re.compile(r"^ +") + self.space_end = re.compile(r" +$") + regex_replacements_1 = ( + (re.compile(r"(?<=\d)(\s+)(?=%(?=$| |\.|,))"), "\u00A0"), # % + (re.compile(r"\b(\s+)(?=:|»)"), "\u00A0"), # : » + (re.compile(r"(?<=«)(\s+)\b"), "\u00A0"), # « + (re.compile(r"\b(\s+)(?=;|!|\?)"), "\u202F"), # ; ! ? + (re.compile(r"\b( )\b"), " "), # double space + ) + regex_replacements_2 = ( + ( + re.compile(r"(?<==')([^\\']*(\b\\'\b))*([^\\']*)(?=')"), + lambda m: _replace_group(m, 2, "\u2019"), + ), + (re.compile(r"\b(')(?=$|\b|\s[:;!?]|[.,])"), "\u2019"), # ' + ) + + regex_replacements_3 = ( + (re.compile(r"\b( )(?=\.|,)"), ""), # remove space before point and virgule + ) + + self.regex_replacements = { + "1": regex_replacements_1, + "2": regex_replacements_2, + "3": regex_replacements_3, + } + + replacements_1 = () + replacements_2 = () + replacements_3 = () + + self.replacements = { + "1": replacements_1, + "2": replacements_2, + "3": replacements_3, + } + + self.filters = ( + SpecialFilter( + "extra_spaces", + params.extra_spaces, + lambda msg: msg.refentry in self.spaces, + self.replace_extra_spaces, + ), + SpecialFilter( + "ellipsis3points", + params.ellipsis3points, + lambda _: True, + lambda text: text.replace("\u2026", "..."), + ), + SpecialFilter( + "ellipsisUnicode", + params.ellipsisUnicode, + lambda _: True, + lambda text: text.replace("...", "\u2026"), + ), + ) # in future, add other specials filters + self.used_filters = [_ for _ in self.filters if _.value] + + def process(self, msg, cat): + oldcount = msg.modcount + + for nb in self.level: + for i in range(len(msg.msgstr)): + msg.msgstr[i] = self.correctTypo( + msg.msgstr[i], + self.replacements[nb], + self.regex_replacements[nb], + ) + + for _ in self.used_filters: + if _.value: + msg = _.process(msg) + + if oldcount < msg.modcount: + self.nmatch += 1 + + def finalize(self): + if self.nmatch > 0 and not self.p.quiet: + report( + n_( + "@info", + "There was %(num)d corrected message.", + "There were %(num)d corrected messages.", + num=self.nmatch, + ) + ) + + def correctTypo(self, text, replacements, regex_replacements): + """Set correct typo""" + + for _ in replacements: + text = text.replace(_[0], _[1]) + for _ in regex_replacements: + text = _[0].sub(_[1], text) + + return text + + def replace_extra_spaces(self, text): + """Replace space at start and end by punctuation space""" + # punctuation space " " : \u2008 + match_start = re.search(self.space_start, text) + match_end = re.search(self.space_end, text) + + if match_start: + text = re.sub(self.space_start, "\u2008" * len(match_start[0]), text) + + if match_end: + text = re.sub(self.space_end, "\u2008" * len(match_end[0]), text) + + return text