HTML to LaTeX RE pattern and sub

[EMAIL PROTECTED] Fri, 23 Sep 2005 13:40:48 -0700

Hi,

trying to make a small script which would translate wiki file into
LaTeX and when trying to translate possible HTML elements into LaTeX I
did this:


def latexEnvironments(matchobj):
    print >>sys.stderr,str(matchobj.groups())
    inStr = matchobj.group(1)
    translDict = {'dl': ('\\begin{description}','\\end{description}'),
        'ol': ('\\begin{enumerate}','\\end{enumerate}'),
        'ul': ('\\begin{itemize}','\\end{itemize}'),
        'blockquote': ('\\begin{quote}','\\end{quote}'),
        'center': ('\\begin{center}','\\end{center}'),
        'li': ('\\item',''),
        'pre': ('\\begin{ttfamily}','\\end{ttfamily}')}
    if translDict.has_key(inStr):
        retTuple = translDict[inStr]
    else:
        retTuple = ('','')
    if matchobj.group(0) == '/':
        return retTuple[1]
    else:
        return retTuple[0]

def latexHeadings(matchobj):
    print >>sys.stderr,str(matchobj.groups())
    inStr = matchobj.groups[1]
    translDict = {'h1': '\\section{}',
        'h2': '\\subsection{}',
        'h3': '\\subsubsection{}',
        'h4': '\\subsubsubsection{}',
        'h5': '\\paragraph{}'}
    if translDict.has_key(inStr) and (matchobj.groups[0] != '/'):
        return translDict[inStr]
    else:
        return ''

def latexEmptyElements(matchobj):
    retString = ""
    print >>sys.stderr,str(matchobj.groups())
    inStr = matchobj.group(0)
    translDict = {'br//': '\\\\',
        'hr': '\\par{}\\hrulefill{}\\par{}'}
    if translDict.has_key(inStr):
        return translDict[inStr]
    else:
        return ''



[... snip ...]

# Remove superfluous HTML elements
reEnvironments =
re.compile(r'<(/?)(dl|ol|ul|address|blockquote|center|del\
|ins|div|isindex|noscript|p|pre)>',re.IGNORE)
reHeadings = re.compile(r'<(/?)(h1|h2|h3|h4|h5|h6)>',re.IGNORE)
reEmpty = re.compile(r'<(hr|br)\s*/?>',re.IGNORE)
body = reEnvironments.sub(latexEnvironments,body)
body = reHeadings.sub(latexHeadings,body)
body = reEmpty.sub(latexEmptyElements,body)

The problem is that apparently RE never matches (and no function is
thus called). Can anybody tell me what's wrong with my REs, please?

Thanks for any help,

Matej

-- 
http://mail.python.org/mailman/listinfo/python-list

HTML to LaTeX RE pattern and sub

Reply via email to