This is what I have so far. It is not really a priority of mine. Implementing tables may be beyond what I was planning to do.
-- You received this message because you are subscribed to the Google Groups "web2py-users" group. To post to this group, send email to web...@googlegroups.com. To unsubscribe from this group, send email to web2py+unsubscr...@googlegroups.com. For more options, visit this group at http://groups.google.com/group/web2py?hl=en.
import re import cgi import sanitizer class Markdown: def __init__(self): self.pre='filu' self.post='fila' self.regex_cleanup=re.compile('\s*\n(\s*(\n|\r))?',re.MULTILINE) self.actions = [ (re.compile('(?P<s>xyz\d+t)'),self.t_ignore), (re.compile('\n(?P<s>( .*?\n)+)',re.MULTILINE),self.t_code2), (re.compile('`\s*(?P<s>.+?)\s*`',re.MULTILINE),self.t_code), (re.compile('((\*|\-)\s*){3}((\*|\-)\s*)*',re.MULTILINE),self.t_hr), (re.compile('\*\*\s*(?P<s>.+?)\s*\*\*',re.MULTILINE),self.t_strong), (re.compile('__\s*(?P<s>.+?)\s*__',re.MULTILINE),self.t_strong), (re.compile('\*\s*(?P<s>.+?)\s*\*',re.MULTILINE),self.t_emphasize), (re.compile('_\s*(?P<s>.+?)\s*_',re.MULTILINE),self.t_emphasize), (re.compile('^######\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h6), (re.compile('^#####\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h5), (re.compile('^####\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h4), (re.compile('^###\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h3), (re.compile('^##\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h2), (re.compile('^#\s*(?P<s>.+?)\n',re.MULTILINE),self.t_h1), (re.compile('(?P<s>.+?)`',re.MULTILINE),self.t_code), (re.compile('!\[(?P<key>.+?)\]\s*\((?P<link>.+?)\)'),self.t_image), (re.compile('\[(?P<key>.+?)\]\s*\[(?P<link>.*?)\]'),self.t_ref_link), (re.compile('\[(?P<key>.+?)\]\s*\((?P<link>.+?)\s+"(?P<title>.*?)"\s*\)'),self.t_title_link), (re.compile('\[(?P<key>.+?)\]\s*\((?P<link>.+?)\s+\((?P<title>.*?)\)\s*\)'),self.t_title_link), (re.compile('\[(?P<key>.+?)\]\s*\((?P<link>.+?)\)'),self.t_link), (re.compile('\[(?P<key>\w+?)\]\:\s+(?P<link>\S+)(\s+"(?P<title>.*)")?'),self.t_reference), (re.compile('\[(?P<key>\w+?)\]\:\s+(?P<link>\S+)(\s+\((?P<title>.*)\))?'),self.t_reference), (re.compile('\n(?P<s>((\*|\-|\+)\s+.+?\n)+)',re.MULTILINE),self.t_ul), (re.compile('\n(?P<s>(\d+\s+.+?\n)+)',re.MULTILINE),self.t_ol), ] def sanitize(self,text): return sanitizer.sanitize(text,permitted_tags=['a','div','img','span']) def t_ignore(self,match): return match.group('s') def t_newlines(self,match): return '\n' def t_strong(self,match): return '<strong>%s</strong>' % self.sanitize(match.group('s')) def t_emphasize(self,match): return '<em>%s</em>' % self.sanitize(match.group('s')) def t_code(self,match): return '<code>%s</code>' % self.sanitize(match.group('s')) def t_code(self,match): return '<code>%s</code>' % self.sanitize(match.group('s')) def t_code2(self,match): return '<pre><code>' + \ '\n'.join([x[4:] for x in match.group('s').split('\n')]) + \ '</pre></code>' def t_ul(self,match): return '<ul>\n' + \ '\n'.join(['<li>%s</li>' % self.sanitize(x) for x in match.group('s').strip().split('\n')]) + \ '</ul>' def t_ol(self,match): return '<ol>\n' + \ '\n'.join(['<li>%s</li>' % self.sanitize(x.split(' ',1)[1]) for x in match.group('s').strip().split('\n')]) + \ '</ol>' def t_h6(self,match): return '<h6>%s</h6>' % self.sanitize(match.group('s')) def t_h5(self,match): return '<h5>%s</h5>' % self.sanitize(match.group('s')) def t_h4(self,match): return '<h4>%s</h4>' % self.sanitize(match.group('s')) def t_h3(self,match): return '<h3>%s</h3>' % self.sanitize(match.group('s')) def t_h2(self,match): return '<h2>%s</h2>' % self.sanitize(match.group('s')) def t_h1(self,match): return '<h1>%s</h1>' % self.sanitize(match.group('s')) def t_hr(self,match): return '<hr/>' def t_image(self,match): return '<img src="%s" alt="%s" />' %(match.group('link'),match.group('key')) def t_ref_link(self,match): key=match.group('key') link = match.group('link') or key return '<a href="#%s">%s</a>' % (link.lower(), key) def t_link(self,match): return '<a href="%s">%s</a>' % (match.group('link'),match.group('key')) def t_title_link(self,match): return '<a href="%s" title="%s">%s</a>' % \ (match.group('link'),match.group('title'),match.group('key')) def t_reference(self,match): key=match.group('key') title=match.group('title') or key return '<a id="%s" href="%s">%s</a><br/>' % (key.lower(),match.group('link'),title) def convert(self,text): actions=self.actions substitutions=[] text = self.regex_cleanup.sub('\n',text+'\n').replace('\t',' ') for (regex,action) in actions: next=0 while True: match=regex.search(text,next) if not match: break k=len(substitutions) substitutions.append(action(match)) key=self.pre+str(k)+self.post text=text[:match.start()]+key+text[match.end():] next=match.start()+len(key) text = self.sanitize(text) for k in range(len(substitutions)-1,-1,-1): key=self.pre+str(k)+self.post text=text.replace(key,substitutions[k]) return text print Markdown().convert(""" # This ## is ### a test *** This is a **strong** *emphasized* piece of `code` <div>in a div</div> and invalid < code. and this [is a link](http://www.google.com) to something and this [is a link](http://www.google.com "with title") to something Here is an image ![image] (http://www.google.com) This if code: for i in range(10): print(i) here is a referene to [Google][1] and to [Yahoo][] here is a list - dog - cat - mouse and another 1 dog 2 cat 3 mouse This mess up with this: xyz2t - - - [1]: http://www.google.com "google" [Yahoo]: http://www.yahoo.com "Yahoo" """)