This is what I have so far. It is not really a priority of mine.
Implementing tables may be beyond what I was planning to do.


--

You received this message because you are subscribed to the Google Groups 
"web2py-users" group.
To post to this group, send email to web...@googlegroups.com.
To unsubscribe from this group, send email to 
web2py+unsubscr...@googlegroups.com.
For more options, visit this group at 
http://groups.google.com/group/web2py?hl=en.


import re
import cgi
import sanitizer

class Markdown:
    def __init__(self):
        self.pre='filu'
        self.post='fila'
        self.regex_cleanup=re.compile('\s*\n(\s*(\n|\r))?',re.MULTILINE)
        self.actions = [
            (re.compile('(?P<s>xyz\d+t)'),self.t_ignore),
            (re.compile('\n(?P<s>(    .*?\n)+)',re.MULTILINE),self.t_code2),
            (re.compile('`\s*(?P<s>.+?)\s*`',re.MULTILINE),self.t_code),
            (re.compile('((\*|\-)\s*){3}((\*|\-)\s*)*',re.MULTILINE),self.t_hr),
            (re.compile('\*\*\s*(?P<s>.+?)\s*\*\*',re.MULTILINE),self.t_strong),
            (re.compile('__\s*(?P<s>.+?)\s*__',re.MULTILINE),self.t_strong),
            (re.compile('\*\s*(?P<s>.+?)\s*\*',re.MULTILINE),self.t_emphasize),
            (re.compile('_\s*(?P<s>.+?)\s*_',re.MULTILINE),self.t_emphasize),
            (re.compile('^######\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h6),
            (re.compile('^#####\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h5),
            (re.compile('^####\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h4),
            (re.compile('^###\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h3),
            (re.compile('^##\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h2),
            (re.compile('^#\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h1),            
            (re.compile('(?P<s>.+?)`',re.MULTILINE),self.t_code),            
            (re.compile('!\[(?P<key>.+?)\]\s*\((?P<link>.+?)\)'),self.t_image),
            (re.compile('\[(?P<key>.+?)\]\s*\[(?P<link>.*?)\]'),self.t_ref_link),
            (re.compile('\[(?P<key>.+?)\]\s*\((?P<link>.+?)\s+"(?P<title>.*?)"\s*\)'),self.t_title_link),
            (re.compile('\[(?P<key>.+?)\]\s*\((?P<link>.+?)\s+\((?P<title>.*?)\)\s*\)'),self.t_title_link),
            (re.compile('\[(?P<key>.+?)\]\s*\((?P<link>.+?)\)'),self.t_link),
            (re.compile('\[(?P<key>\w+?)\]\:\s+(?P<link>\S+)(\s+"(?P<title>.*)")?'),self.t_reference),
            (re.compile('\[(?P<key>\w+?)\]\:\s+(?P<link>\S+)(\s+\((?P<title>.*)\))?'),self.t_reference),

            (re.compile('\n(?P<s>((\*|\-|\+)\s+.+?\n)+)',re.MULTILINE),self.t_ul),
            (re.compile('\n(?P<s>(\d+\s+.+?\n)+)',re.MULTILINE),self.t_ol),
            ]
    def sanitize(self,text):
        return sanitizer.sanitize(text,permitted_tags=['a','div','img','span'])
    def t_ignore(self,match):
        return match.group('s')
    def t_newlines(self,match):
        return '\n'
    def t_strong(self,match):
        return '<strong>%s</strong>' % self.sanitize(match.group('s'))
    def t_emphasize(self,match):
        return '<em>%s</em>' % self.sanitize(match.group('s'))
    def t_code(self,match):
        return '<code>%s</code>' % self.sanitize(match.group('s'))
    def t_code(self,match):
        return '<code>%s</code>' % self.sanitize(match.group('s'))
    def t_code2(self,match):
        return '<pre><code>' + \
            '\n'.join([x[4:] for x in match.group('s').split('\n')]) + \
            '</pre></code>'
    def t_ul(self,match):
        return '<ul>\n' + \
            '\n'.join(['<li>%s</li>' % self.sanitize(x) for x in match.group('s').strip().split('\n')]) + \
            '</ul>'
    def t_ol(self,match):
        return '<ol>\n' + \
            '\n'.join(['<li>%s</li>' % self.sanitize(x.split(' ',1)[1]) for x in match.group('s').strip().split('\n')]) + \
            '</ol>'
    def t_h6(self,match):
        return '<h6>%s</h6>' % self.sanitize(match.group('s'))
    def t_h5(self,match):
        return '<h5>%s</h5>' % self.sanitize(match.group('s'))
    def t_h4(self,match):
        return '<h4>%s</h4>' % self.sanitize(match.group('s'))
    def t_h3(self,match):
        return '<h3>%s</h3>' % self.sanitize(match.group('s'))
    def t_h2(self,match):
        return '<h2>%s</h2>' % self.sanitize(match.group('s'))
    def t_h1(self,match):
        return '<h1>%s</h1>' % self.sanitize(match.group('s'))
    def t_hr(self,match):
        return '<hr/>' 
    def t_image(self,match):        
        return '<img src="%s" alt="%s" />' %(match.group('link'),match.group('key')) 
    def t_ref_link(self,match):
        key=match.group('key')
        link = match.group('link') or key
        return '<a href="#%s">%s</a>' % (link.lower(), key)
    def t_link(self,match):        
        return '<a href="%s">%s</a>' % (match.group('link'),match.group('key')) 
    def t_title_link(self,match):  
        return '<a href="%s" title="%s">%s</a>' % \
            (match.group('link'),match.group('title'),match.group('key')) 
    def t_reference(self,match):      
        key=match.group('key')
        title=match.group('title') or key
        return '<a id="%s" href="%s">%s</a><br/>' % (key.lower(),match.group('link'),title)
    def convert(self,text):
        actions=self.actions
        substitutions=[]
        text = self.regex_cleanup.sub('\n',text+'\n').replace('\t','    ')
        for (regex,action) in actions:
            next=0
            while True:
                match=regex.search(text,next)
                if not match: break
                k=len(substitutions)
                substitutions.append(action(match))
                key=self.pre+str(k)+self.post
                text=text[:match.start()]+key+text[match.end():]
                next=match.start()+len(key)
        text = self.sanitize(text)
        for k in range(len(substitutions)-1,-1,-1):
            key=self.pre+str(k)+self.post            
            text=text.replace(key,substitutions[k])
        return text

print Markdown().convert("""
# This
## is
### a test

***
This is a **strong** *emphasized* piece of `code` <div>in a div</div>
and invalid < code.
and this [is a link](http://www.google.com) to something
and this [is a link](http://www.google.com "with title") to something
Here is an image ![image] (http://www.google.com)
This if code:

    for i in range(10):
        print(i)

here is a referene to [Google][1] and to [Yahoo][]

here is a list

- dog
- cat
- mouse

and another

1 dog
2 cat
3 mouse

This mess up with this: xyz2t
- - -
[1]: http://www.google.com
"google"
[Yahoo]: http://www.yahoo.com
"Yahoo"
""")    

Reply via email to