[sphinx-users] Mechanism for extracting translated text from docutils/nodes

'Hoang Tran' via sphinx-users Sat, 23 Nov 2019 17:06:38 -0800

Hi, 
This is my first time posting on this forum, so please be gentle with me. 
Currently I'm working as a translator for Blender project, translating 
reference manual to Vietnamese. I have completed the UI and done some 
chapters on the translation set, which can be used as a database for 
untranslated text. I'm only interested to output to PO file. My current 
solution is to

1. Write an extension, and making use of 'app.connect('doctree-resolved',
doctree_resolved)'.
2. Within 'doctree_resolved', I use a for loop with
extract_messages(doctree) to traverse and extract all messages to be
translated.
3. I use 'node.walk' with a 'visitor' (extends nodes.TreeCopyVisitor) to
traverse all children nodes of the current node.
4. In the visitor, I use the 'default_visit' to traverse the children
(recursively) to the end instance of nodes.Text where I use 'astext()' to
extract the English text, then use a lookup routine to find appropriate
'translation' for the text.
5. Due to the fact that some texts are REQUIRED to have the original
English appending to it (ending), a methodology tor leave crumbs for
Vietnamese readers, who would like to reference back to English original
HTML texts. These items are often from 'inline', 'title', 'rubric' etc.. so
I use a flag to identify when translation texts (if exists) will include
the original in it (and called this combination as translation text).
6. I use Message and Catalog to store English (msgid) and translation
(msgstr) and write them out to a separate directory before merging them
with existing translations by diff.

My questions:

1. How should I store translation text? Currently I'm forced to insert a
variable in the 'docutils.Node' to store the translation (for testing).
2. How can I use the same 'extract_messages(doctree)' mechanism to extract
English and Translation at the same time? Currently everything is tied to
the overloaded 'astext()' method and that seemed to go to __repr__(self),
how would I approach to solve this problem?

Best regards,
Hoang Tran

All the code is attached.

--
You received this message because you are subscribed to the Google Groups
"sphinx-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To view this discussion on the web visit
https://groups.google.com/d/msgid/sphinx-users/7e65b85e-c7dd-4104-8cec-3732c1de9b69%40googlegroups.com.

import os
import io
import re
import json
from common import Common as cm
from pobase import POBasic
import docutils
from docutils import nodes
from sphinx import addnodes, roles
from pprint import pprint as PP
from six import text_type
from sphinx.util.nodes import extract_messages, traverse_translatable_index
from distutils import log as logger
from sphinx.locale import _, __
from sphinx_intl import catalog as c
from babel.messages import pofile
from babel.messages.catalog import Message, Catalog
from pprint import pprint as pp
from googletrans import Translator as GTR

#from Levenshtein import distance as DS
from markupsafe import Markup
try:
    import html
except ImportError:
    html = None

unescape = getattr(html, 'unescape', None)
if unescape is None:
    # HTMLParser.unescape is deprecated since Python 3.4, and will be removed
    # from 3.9.
    unescape = html_parser.HTMLParser().unescape


# sudo apt install python-pip python3-pip
# pip3 install googletrans
from googletrans import Translator

use_google_translate = False

# type: <class 'docutils.nodes.caption'>
# type: <class 'docutils.nodes.emphasis'>
# type: <class 'docutils.nodes.field_name'>
# type: <class 'docutils.nodes.image'>
# type: <class 'docutils.nodes.inline'>
# type: <class 'docutils.nodes.line'>
# type: <class 'docutils.nodes.literal'>
# type: <class 'docutils.nodes.math'>
# type: <class 'docutils.nodes.math_block'>
# type: <class 'docutils.nodes.paragraph'>
# type: <class 'docutils.nodes.reference'>
# type: <class 'docutils.nodes.rubric'>
# type: <class 'docutils.nodes.strong'>
# type: <class 'docutils.nodes.superscript'>
# type: <class 'docutils.nodes.term'>
# type: <class 'docutils.nodes.Text'>
# type: <class 'docutils.nodes.title'>

def isIgnored(msg):
    is_ignore_word = cm.isIgnoredWord(msg)
    is_dos_command = cm.isDosCommand(msg)
    is_ignore_start = cm.isIgnoredIfStartsWith(msg)
    is_ignore_path = cm.isFilePath(msg)

    is_ignore = (is_ignore_word or is_dos_command or is_ignore_start or is_ignore_path)
    #is_ignore = (is_ignore_word or is_dos_command or is_ignore_start)
    if is_ignore:
        # print("checking for ignore")
        dict_ignore = {"is_ignore_word": is_ignore_word,
                       "is_dos_command": is_dos_command,
                       "is_ignore_start": is_ignore_start,
                       "is_ignore_path": is_ignore_path
                       }
        # pp(dict_ignore)
        for k, v in dict_ignore.items():
            if isinstance(v, bool) and (v == True):
                print(k, v, msg)
    return is_ignore


class TranslationFinder:
    def __init__(self):
        self.dic_path = "/home/htran/blender_documentations/new_po/vi.po"
        self.current_po_dir = "/home/htran/blender_documentations/blender_docs/locale/vi/LC_MESSAGES"
        self.json_dic_file = "/home/htran/Documents/menuselection_new_dictionary_sorted_translated_0028.json"

        self.json_dic_list = self.loadJSONDic()
        self.json_dic_list_lower = self.makeJSONLowerCaseDic(self.json_dic_list)
        self.dic_cat = c.load_po(self.dic_path)
        self.dic_cat_lower = self.poCatDictLower(self.dic_cat)
        self.translated_po_dic = None
        self.translated_po_dic_lower = None
        self.translated_po_dic, self.translated_po_dic_lower = self.loadTranslatedPO()
        self.tr = Translator()

        #po_dic = self.poCatToList(dic_cat)

        #po_dic_lower = self.poCatToListLower(dic_cat)
        #sorted_po_dic = sorted(po_dic)
        #PP(po_dic_lower)
        #sorted_lower_po_dic = sorted(po_dic_lower)

    def removeJSONDicNoTranslation(self, dic):
        dic_removable=[]
        for k,v in dic.items():
            has_translation = (len(v)>0)
            if not has_translation:
                dic_removable.append(k)

        new_dic = dic
        for k in dic_removable:
            new_dic.pop(k)
        return new_dic

    def makeJSONLowerCaseDic(self, dic):
        lcase_dic={}
        for k,v in dic.items():
            lk = k.lower()
            lv = v
            lcase_dic.update({lk:lv})
        #PP(lcase_dic)
        #exit(0)
        return lcase_dic

    def loadJSONDic(self, file_name=None):
        dic = None
        try:
            file_path = (self.json_dic_file if (file_name == None) else file_name)
            with open(file_path) as in_file:
                dic = json.load(in_file)
                if dic:
                    print("Loaded:{}".format(len(dic)))
                else:
                    raise Exception("dic [{}] is EMPTY. Not expected!", file_path)
        except Exception as e:
            print("Exception readDictionary Length of read dictionary:")
            print(e)
            raise e

        dic = self.removeJSONDicNoTranslation(dic)
        print("after cleaned:{}".format(len(dic)))
        return dic


    def POContentToDic(self, po_cat, dict, dict_lowercase):
        for m in po_cat:
            k = m.id
            v = m.string
            # is_debug = ("POV" in k)
            # if is_debug:
            #     print("{} => {}".format(k, v))
            #     exit(0)

            has_trans = v and (len(v) > 0)
            if (has_trans):
                is_same = (k == v)
                if (is_same):
                    continue
                else:
                    dict.update({k:m})
                    dict_lowercase.update({k.lower():m})


    def loadTranslatedPO(self):
        all_po_dict={}
        all_po_dict_lower = {}
        getter = POBasic(self.current_po_dir, False)
        po_dir_list = getter.getSortedPOFileList()
        for(index, po_file_path) in enumerate(po_dir_list):
            if (len(po_file_path) <= 0):
                continue
            po_cat = c.load_po(po_file_path)
            self.POContentToDic(po_cat, all_po_dict, all_po_dict_lower)

        # print("all_po_dict:")
        # pp(all_po_dict)
        # exit(0)
        return all_po_dict, all_po_dict_lower

    def poCatToList(self, po_cat):
        l = []
        for index, m in enumerate(po_cat):
            k = m.id
            v = m
            l.append((k, v))
        return l

    def poCatToListLower(self, po_cat):
        l = []
        for index, m in enumerate(po_cat):
            k = m.id.lower()
            v = m
            l.append((k, v))
        return l

    def poCatDictLower(self, po_cat):
        l = {}
        for index, m in enumerate(po_cat):
            k = m.id.lower()
            v = m
            l.update({k: v})
        return l

    # PP(sorted_lower_po_dic)

    def dump_po(self, filename, catalog):
        dirname = os.path.dirname(filename)
        if not os.path.exists(dirname):
            os.makedirs(dirname)

        # Because babel automatically encode strings, file should be open as binary mode.
        with io.open(filename, 'wb') as f:
            pofile.write_po(f, catalog, width=4096)


    def removeOriginal(self, msg, trans):
        # print("removeOriginal msg = [{}], trans = [{}]".format(msg, trans))
        orig_index = -1
        # if trans:
        #     orig_index = trans.find(trans, "-- ")
        #     has_orig = (msg in trans) and (orig_index > 0)
        #     is_empty_trans = (orig_index == 0)
        #     if is_empty_trans:
        #         return None
        #
        #     if has_orig:
        #         word_list = trans.split("-- ")
        #         trans = word_list[0]
        #         return trans
        # return None

        msg = re.escape(msg)
        p = r'\b{}\b'.format(msg)
        has_original = (re.search(p, trans, flags=re.I) != None)
        endings=("", "s", "es", "ies", "ed", "ing", "lly",)
        if has_original:
            for end in endings:
                p = r'-- {}{}'.format(msg, end)
                trans = re.sub(p, "", trans, flags=re.I)

            for end in endings:
                p = r'{}{} --'.format(msg, end)
                trans = re.sub(p, "", trans, flags=re.I)

            for end in endings:
                p = r'\\b{}{}\\b'.format(msg, end)
                trans = re.sub(p, "", trans, flags=re.I)
            trans = trans.strip()
            is_empty = (len(trans) == 0)
            if is_empty:
                trans = None

            # if trans:
            #     print("removeOriginal msg: [{}] => trans: [{}]".format(msg, trans))
            # else:
            #     print("removeOriginal REMOVED AND EMPTY")
        # else:
        #     print("removeOriginal DO NOT HAVE ORIGINAL in translation")

        return trans

    def isInList(self, msg, find_list, is_lower=False):
        orig_msg = str(msg)
        trans = None
        if is_lower:
            msg = msg.lower()
        if msg in find_list:
            trans = find_list[msg]
        else:
            trams = None

        if isinstance(trans, Message):
            trans = trans.string
        else:
            trans = trans

        # is_debug = ("POV" in msg)
        # if is_debug:
        #     print("DEBUG: msg [{}] => [{}]".format(msg, trans))

        has_translation = trans and (len(trans) > 0) and (trans != 'None')
        if has_translation:
            trans = trans.strip()
            trans = self.removeOriginal(msg, trans)
            trans = cm.matchCase(orig_msg, trans)
        else:
            trans = None
        return  trans

    def findTranslationByFragment(self, msg):
        #print("findTranslationByFragment", msg)
        if isIgnored(msg):
            return None

        #print("findTranslationByFragment:[{}]".format(msg))
        msg = unescape(msg)

        trans_list = []
        trans = str(msg)
        for w_start, w_end, w_match_0, w_match_1 in cm.patternMatchAsList(cm.WORD_ONLY_FIND, msg):

            if isIgnored(w_match_0):
                continue

            trans_word = trans_finder.findTranslation(w_match_0)
            trans_word_entry = (w_start, w_end, w_match_0, trans_word)

            # print("FIND TRANS: w_start, w_end, w_match_0, w_match_1, trans_word")
            # print(w_start, w_end, w_match_0, w_match_1, trans_word)

            trans_list.append(trans_word_entry)

        # print("findTranslationByFragment trans_list")
        # pp(trans_list)

        for w_start, w_end, w_match_0, trans_word in reversed(trans_list):
            # print("REPLACING: w_start, w_end, w_match_0, trans_word")
            # print(w_start, w_end, w_match_0, trans_word)
            if trans_word:
                w_end = w_start + len(w_match_0)
                st = trans[:w_start]
                se = trans[w_end:]
                trans = st + trans_word + se
            #     print("findTranslationByFragment st:[{}], se:[{}], trans:[{}]".format(st, se, trans))
            #     #trans = trans.strip()
            # print("findTranslationByFragment trans:[{}]".format(trans))

        is_changed = (trans != msg)
        if not is_changed:
            trans = None

        # if trans:
        #     print("RESULT findTranslationByFragment: [{}] => [{}]".format(msg, trans))

        return trans

    def findTranslation(self, msg):
        list_name = "self.dic_cat"
        trans = self.isInList(msg, self.dic_cat)
        if not trans:
            list_name = "self.dic_cat_lower"
            trans = self.isInList(msg, self.dic_cat_lower, is_lower=True)
            if not trans:
                list_name = "self.translated_po_dic"
                trans = self.isInList(msg, self.translated_po_dic)
                if not trans:
                    list_name = "self.translated_po_dic_lower"
                    trans = self.isInList(msg, self.translated_po_dic_lower, is_lower=True)
                    if not trans:
                        list_name = "self.json_dic_list"
                        trans = self.isInList(msg, self.json_dic_list)
                        if not trans:
                            list_name = "self.json_dic_list_lower"
                            trans = self.isInList(msg, self.json_dic_list_lower, is_lower=True)
                            if not trans:
                                list_name = ""
                                return None

        has_translation = trans and (len(trans) > 0) and (trans != 'None')
        if has_translation:
            trans = trans.strip()
            trans = self.removeOriginal(msg, trans)
            print("findTranslation in {} [{}] => [{}]".format(list_name, msg, trans))
        else:
            trans = None
        return trans


trans_finder = TranslationFinder()

MNU_SEL = 'menuselection'
MNU_SEP = '-->'
CLASS = 'classes'
RAWTXT = 'rawtext'
DOC = 'doc' #:doc:`keyframe
STD_REF="std std-ref" #:ref:`easings
X_REF="xref std std-term" #:term:`walk cycle
BACKQUOTE="`"
TR_ED='translated'
def print_separator(output_path):
    print("docname:", output_path)
    print("-" * 30)

def print_result_list(result):
    PP(result)

# type: <class 'docutils.nodes.caption'>
# type: <class 'docutils.nodes.emphasis'>
# type: <class 'docutils.nodes.field_name'>
# type: <class 'docutils.nodes.image'>
# type: <class 'docutils.nodes.inline'>
# type: <class 'docutils.nodes.line'>
# type: <class 'docutils.nodes.literal'>
# type: <class 'docutils.nodes.math'>
# type: <class 'docutils.nodes.math_block'>
# type: <class 'docutils.nodes.paragraph'>
# type: <class 'docutils.nodes.reference'>
# type: <class 'docutils.nodes.rubric'>
# type: <class 'docutils.nodes.strong'>
# type: <class 'docutils.nodes.superscript'>
# type: <class 'docutils.nodes.term'>
# type: <class 'docutils.nodes.Text'>
# type: <class 'docutils.nodes.title'>

class TranslationNodeVisitor(nodes.TreeCopyVisitor):
    """Raise `nodes.NodeFound` if non-simple list item is encountered.

    Here 'simple' means a list item containing only a paragraph with a
    single reference in it.
    """
    def setVars(self, node, msg, trans):
        self.current_node = node
        self.current_msg = msg
        if trans:
            self.current_trans = trans
        else:
            self.current_trans = msg
        self.fuzzy_list = []
        self.is_fuzzy = False
        self.is_title_node = False
        self.tr = None
        self.ref_trans=[]
        self.keep_original = False

    def trans_keyboard(self, msg):
        trans = None
        is_keyboard = (":kbd:" in msg)
        if not is_keyboard:
            return None

        if isIgnored(msg):
            return None

        orig_text = str(msg)
        for w_start, w_end, w_match_0, w_match_1 in cm.patternMatchAsList(cm.TEXT_BETWEEN_REFS, msg):

            if w_match_1:
                trans = cm.translateKeyboardDef(w_match_1)
                if trans:
                    orig_text = re.sub(w_match_1, trans, orig_text)
        return orig_text

    def setNodeTranslated(self, node):
        node[TR_ED] = True
        print("NODE TRANSLATED!!!")

    def getNodeTranslated(self, node):
        try:
            transed = node[TR_ED]
            print("NODE TRANSLATION:{}".format(transed))
            return transed
        except Exception as e:
            return False

    def connectMenuItems(self, list_of_menu_items):
        trans_list = []
        for item in list_of_menu_items:
            item = item.strip()
            print("item:", item)
            trans = trans_finder.findTranslation(item)
            if (trans):
                trans = "{} ({})".format(trans, item)
            else:
                trans = "({})".format(item)
            print("trans:", trans)
            trans_list.append(trans)
        return trans_list

    # /home/htran/blender_documentations/blender_docs/build/rstdoc/addons/3d_view/3d_navigation.html
    def trans_menuselection(self, msg):
        print("trans_menuselection", msg)
        is_menu = (":menuselection:" in msg)
        if not is_menu:
            return None

        if isIgnored(msg):
            return None

        trans = None
        for w_start, w_end, w_match_0, w_match_1 in cm.patternMatchAsList(cm.TEXT_BETWEEN_REFS, msg):
            print("w_start, w_end, w_match_0, w_match_1")
            print(w_start, w_end, w_match_0, w_match_1)

            if isIgnored(w_match_1):
                continue

            if w_match_1:
                list_of_menu_items = w_match_1.split(MNU_SEP)
                trans_list = self.connectMenuItems(list_of_menu_items)
                trans = " {} ".format(MNU_SEP).join(trans_list)
                trans = msg[:w_start+1] + trans + msg[w_end-1:]
                print("FINAL {}".format(trans))
                break
        return trans

    def trans_ref(self, msg):
        print("trans_ref", msg)
        is_ref = (cm.REF_KEYS.search(msg) != None)
        if not is_ref:
            return None

        if isIgnored(msg):
            return None

        trans = None
        for w_start, w_end, w_match_0, w_match_1 in cm.patternMatchAsList(cm.TEXT_BETWEEN_REFS, msg):
            print("w_start, w_end, w_match_0, w_match_1")
            print(w_start, w_end, w_match_0, w_match_1)

            if isIgnored(w_match_1):
                continue

            trans = trans_finder.findTranslation(w_match_1)
            if trans:
                trans = "{} -- {}".format(trans, w_match_1)
                trans = msg[:w_start+1] + trans + msg[w_end-1:]

        return trans

    def printNode(self, node, extra=None):
        print('-'*50)
        if extra:
            print(extra)

        # print("self.current_msg:[{}]".format(self.current_msg))
        print("type:", type(node))
        if hasattr(node, 'children'):
            print("children:", node.children)

        #print("name:", type(node.name))
        print("pp node:")
        pp(node)
        if hasattr(node, 'astext'):
            msg = node.astext()
            msg = msg.strip()
            print("node text: [{}]".format(msg))

        if hasattr(node, 'rawsource'):
            print("rawsource: [{}]".format(node.rawsource))

        if hasattr(node, 'line'):
            print("line", node.line)

        print("")
        # print("dir:", dir(node))
        # print("len:", len(node))


    def translate_text(self, msg, include_origin_in_translation=False, bracketting_origin=False):

        if isIgnored(msg):
            return None

        trans = trans_finder.findTranslation(msg)
        is_repeat = (trans == msg)
        valid_trans = (trans and not is_repeat)
        if valid_trans:
            print('has translation:', trans)
        else:
            trans = self.trans_keyboard(msg)
            if not trans:
                trans = self.trans_menuselection(msg)
                if not trans:
                    trans = self.trans_ref(msg)
                    if not trans:
                        trans = trans_finder.findTranslationByFragment(msg)
                        if not trans:
                            trans = None

        if trans:
            if include_origin_in_translation:
                if bracketting_origin:
                    trans = "{} ({})".format(trans, msg)
                else:
                    trans = "{} -- {}".format(trans, msg)
        else:
            if include_origin_in_translation:
                if bracketting_origin:
                    trans = "-- ({})".format(msg)
                else:
                    trans = "-- {}".format(msg)

        print("trans: [{}]".format(trans))
        print('-'*30)
        return trans

    def translate(self, node, include_origin_in_translation=False, bracketting_origin=False):
        print("translate node:")
        msg = node.astext()
        trans = self.translate_text(msg, include_origin_in_translation = include_origin_in_translation, bracketting_origin = bracketting_origin)
        return trans


    def default_visit(self, node):
        print("-"*50)
        # class 'docutils.nodes.emphasis' ##>
        # class 'docutils.nodes.inline' ##>
        # class 'docutils.nodes.literal' ##>
        # class 'docutils.nodes.reference' ##>
        # class 'docutils.nodes.strong' ##>

        print(type(node))
        is_inline = isinstance(node, nodes.inline)
        is_emphasis = isinstance(node, nodes.emphasis)
        is_title = isinstance(node, nodes.title)
        is_term = isinstance(node, nodes.term)
        is_rubric = isinstance(node, nodes.rubric)
        is_field_name = isinstance(node, nodes.field_name)
        is_reference = isinstance(node, nodes.reference)
        is_strong = isinstance(node, nodes.strong)

        if (is_emphasis or is_title or is_term or is_rubric or is_field_name or is_reference or is_strong or is_inline):
            l = {   "is_emphasis":is_emphasis,
                    "is_title":is_title,
                    "is_term":is_term,
                    "is_rubric":is_rubric,
                    "is_field_name":is_field_name,
                    "is_reference":is_reference,
                    "is_strong":is_strong,
                    "is_inline":is_inline }

            for k, v in l.items():
                if v:
                    msg = node.astext()
                    print(msg, "=>" ,k)

            print("TURN ON KEEP ORIGINAL")
            self.keep_original = True

        if isinstance(node, nodes.Text):
            #self.printNode(node, extra="default_visit, TEXT")
            msg = node.astext()
            trans = self.translate_text(msg)
            #this needs to be inserted manually in the file docutils.nodes, class Text

            if trans:
                if self.keep_original:
                    trans = "{} -- {}".format(trans, msg)
                print("MSG:", msg, "=> TRANSLATION:", trans)
            else:
                if self.keep_original:
                    trans = "-- {}".format(msg)
                print("NO TRANSLATION:", "MSG:", msg, "TRANS", trans)
            node.translation = trans
            if self.keep_original:
                print("TURN OFF KEEP ORIGINAL")
                self.keep_original = False
        else:
            has_children = hasattr(node, 'children')
            if has_children:
                for child in node.children:
                    self.default_visit(child)
            else:
                self.printNode(node, extra="default_visit, NOTHING")
            print("-" * 80)
            #raise nodes.SkipNode

            # class 'docutils.nodes.emphasis' ##>
            # class 'docutils.nodes.image'>
            # class 'docutils.nodes.inline' ##>
            # class 'docutils.nodes.literal' ##>
            # class 'docutils.nodes.math'>
            # class 'docutils.nodes.reference' ##>
            # class 'docutils.nodes.strong' ##>
            # class 'docutils.nodes.Text'>

            #elif isinstance(nodes.)
        # type: (nodes.Node) -> None

        # this is where no visit has been implemented????
        #pass

    def visit_emphasis(self, node):
        print("visit_emphasis")

    def visit_strong(self, node):
        print("visit_strong")

    def visit_emphasis(self, node):
        print("visit_emphasis")

    def visit_strong(self, node):
        print("visit_strong")
        #self.printNode(node)

    # def visit_bullet_list(self, node):
    #     # type: (nodes.Node) -> None
    #     pass
    def visit_emphasis(self, node):
        print("visit_emphasis")

    def visit_reference(self, node):
        print("visit_reference")

    def visit_inline(self, node):
        print("visit_inline")
        # for index, n in enumerate(node):
        #     self.printNode(n)
        #     is_xref = ('xref' in node['classes'])
        #     is_term = ('term' in node['classes'])
        #     is_menu = ('menuselection' in node['classes'])
        #     is_keyboard = ('kbd' in node['classes'])
        #     is_doc = ('doc' in node['classes'])
        #
        #     if is_xref:
        #         print("EXREF")
        #     elif is_term:
        #         print("TERM")
        #     elif is_menu:
        #         print("MENU")
        #     elif is_keyboard:
        #         print("KEYBOARD")
        #     elif is_doc:
        #         print("DOC")
        #
        #     has_rawtext = False
        #     raw_text = None
        #     has_uri = False
        #     uri = None
        #     try:
        #         raw_text = node['rawtext']
        #         has_rawtext = (raw_text != None)
        #     except Exception as e:
        #         print(e)
        #
        #     try:
        #         uri = node['refuri']
        #         has_uri = (uri != None)
        #     except Exception as e:
        #         print(e)
        #
        #     if has_rawtext:
        #         print("raw_text:", raw_text)
        #         trans = self.translate_text(raw_text, include_origin_in_translation=True)
        #     elif is_doc:
        #         print("refuri:", uri)
        #         trans = self.translate_text(n, include_origin_in_translation=True)
        #         # if trans:
        #         #     lead = ':doc:`'
        #         #     msg = n.astext()
        #         #     find_text="{}{}".format(lead, msg)
        #         #     replace_text = "{}{} -- {} ".format(lead, trans, msg)
        #         #     self.current_msg = re.sub(find_text, replace_text, self.current_msg)
        #         #     print(find_text, replace_text, "self.current_msg", self.current_msg)
        #     else:
        #         print("NOT RAW")
        #         trans = self.translate(n, include_origin_in_translation=True)

    def visit_literal(self, node):
        print("visit_literal")
        # self.printNode(node)
        # for index, n in enumerate(node):
        #     self.printNode(n)
        #     msg = node.astext()
        #     packed_ref = cm.PACKED_REF.search(msg)
        #     has_packed_ref = (packed_ref != None)
        #     if has_packed_ref:
        #         print("has_packed_ref:[{}]".format(packed_ref))
        #         rs, re, rm1, rm2 = cm.patternMatchAsParts(cm.PACKED_REF, msg)
        #         trans = self.translate_text(rm1)
        #     else:
        #         trans = self.translate_text(msg)

    def invisible_visit(self, node):
        # type: (nodes.Node) -> None
        """Invisible nodes should be ignored."""
        pass
# --------------------------------------------------------------

gtr = GTR() #Google translator

def dump_po(filename, catalog):
    dirname = os.path.dirname(filename)
    if not os.path.exists(dirname):
        os.makedirs(dirname)

    # Because babel automatically encode strings, file should be open as binary mode.
    with io.open(filename, 'wb') as f:
        pofile.write_po(f, catalog, width=4096)

def isInRefList(reflist, entry):
    is_empty_list = (not reflist or len(reflist) == 0)
    if is_empty_list:
        return False

    is_empty_entry = (not entry or len(entry) == 0)
    if is_empty_entry:
        return False

    start_post_list = []
    end_post_list = []
    extract_list = []

    es, ee, eorig, eextract = entry
    for s, e, orig, extract in reflist:
        is_entry = (s == es) and (e == ee) and (orig == eorig) and (eextract == extract)
        if is_entry:
            continue

        start_post_list.append(s)
        end_post_list.append(e)
        extract_list.append(extract)

    # print("es, ee, eorig, eextract")
    # print(es, ee, eorig, eextract)
    #
    # pp(start_post_list)
    # pp(end_post_list)
    # pp(orig_list)

    is_in = (es in start_post_list) and (ee in end_post_list) and (eextract in extract_list)
    # print("is_in", is_in)

    return is_in


def doctree_resolved(app, doctree, docname):

    def replaceRefTrans(msg, ref_trans_list):
        print("replaceRefTrans:")
        print("msg:[{}]".format(msg))
        print("ref_trans_list:")
        pp(ref_trans_list)

        orig_msg = str(msg)
        for w_start, w_end, ref, orig, trans in reversed(ref_trans_list):
            msg = msg.replace(orig, trans)
            print("trans_text:[{}]".format(trans))
            print("new_msg:[{}]".format(msg))

        print(orig_msg, " |==>| ", msg)
        return msg

    build_dir = "build/rstdoc"
    po_vi_dir = "locale/vi/LC_MESSAGES"

    po_file_path="{}.po".format(docname)
    local_path = os.path.dirname(os.path.abspath( __file__ ))
    blender_docs_path = os.path.dirname(local_path)

    rst_output_location = os.path.join(blender_docs_path, build_dir)
    output_path = os.path.join(rst_output_location, po_file_path)

    print_separator(output_path)

    # is_debug = "/about/contribute/guides/markup_guide" in output_path
    # if not is_debug:
    #     return


    # for node in doctree.traverse():
    #     is_literal = (isinstance(node, nodes.literal))
    #     if (is_literal):
    #         print("is_literal, type:", type(node))
    #         print("is_literal, text:", node.astext())

    #trans_doctree = doctree.deepcopy()

    cat = Catalog()
    visitor_inited = False
    empty_node_visitor = None

    visitor_inited = False
    visitor = None

    for node, msg in extract_messages(doctree):
        msg = unescape(msg).strip()

        if (not visitor_inited):
            visitor = TranslationNodeVisitor(node.document)
            visitor_inited = True

        #setVars(self, node, msg, trans):
        visitor.setVars(node, msg, None)
        try:
            node.walk(visitor)
        except Exception as e:
            print(e)
            raise e


    print_separator(output_path)

def setup(app):
    # app.connect('builder-inited', builder_inited)
    # app.connect('source-read', source_read)
    app.connect('doctree-resolved', doctree_resolved)
    return {
        'version': '0.1',
        'parallel_read_safe': True,
        'parallel_write_safe': True,
    }

[sphinx-users] Mechanism for extracting translated text from docutils/nodes

Reply via email to