<link type="text/css" href="styles.css" rel="stylesheet" /> </head> <body><div id="page"><h1><intitle/></h1><content/></div></body></html> ''' def getTemplate(default='template.xhtml'): template = os.environ.get('template') if not template: template = default if os.path.exists(template): with open(template,'rt') as inp: return inp.read() with open(template,'wt') as out: out.write(defaultTemplate) return defaultTemplate if __name__ == '__main__': doc,title = markdown(getTemplate(),sys.argv[1]) if 'pretty' in os.environ: print(doc.prettify()) else: print(str(doc))

#!/usr/bin/env python3 from bs4 import BeautifulSoup from bs4.element import NavigableString,PreformattedString,Comment, PageElement import subprocess as s import os,sys,re def BeautifulSoupSucks(inp): return BeautifulSoup(inp,"html.parser") nl = re.compile('\n+') NLPAT = '\s*\n\s*' nlStart = re.compile('^'+NLPAT+'(.*)',re.DOTALL) nlEnd = re.compile('(.*?)'+NLPAT+'$',re.DOTALL) def inline(e): return e.name in {'a','i','b','u','s','span','q','small'} def needsMD(cur): if not hasattr(cur,'attrs'): return False if 'md' in cur.attrs: del cur.attrs['md'] return True # also mdify class="author" because I forget that one. if not 'class' in cur.attrs: return False return 'author' in cur.attrs['class'] def depth(e): c = 0 while e.parent: c += 1 e = e.parent return c things = "\\*_/" outer = '\t\n .?!“”' inner = '^' + "".join(set(things + outer).difference(set(".?!"))) md = re.compile("(["+outer+"])(?P["+things+"]+)(["+inner+"]+)(?P=head)(["+outer+"])") def realtags(m): try: head,mod,contents,tail = m.groups() except ValueError: print("uhhh", m.groups()) raise if mod == '**': tag = 'b' elif mod in {'*','/','_'}: tag = 'i' else: raise RuntimeError("huh?",mod) return head + '<' + tag + '>' + contents + '' + tail def get(source,stdin=False): if stdin: source = md.sub(realtags, source) source = '' + source + '' doc = BeautifulSoupSucks(source) else: with open(source) as inp: source = md.sub(realtags,inp.read()) source = '' + source + '' doc = BeautifulSoupSucks(source) return doc def fuckyou(e): space = re.compile("[\n\t]+") return repr(space.sub(' ',str(e))[:20]) def process(doc): class Thingy: before = None cur = None ended = True def __init__(self, tag, start): self.start = start self.tag = tag assert self.ended def end(self): if not self.cur: return self.cur.append('\n') self.ended = True def current(self): if not self.ended: return self.cur self.ended = False self.cur = doc.new_tag(self.tag) assert self.cur if self.before is None: self.start.insert(0,self.cur) else: self.before.insert_after(self.cur) self.before = self.cur return self.cur class ListThingy(Thingy): def __init__(self, start, ordered=True): top = doc.new_tag('ol' if ordered else 'ul') start.append(top) super().__init__('li', top) def end(self): if self.ended: return self.cur.insert_before('\n') self.ended = True # 1) yield each line, plus elements in between # 2) group those into newline delineated (before,e,after) lists # 3) act on those lists def lines(e): group = [] seen = set() def commit(): nonlocal group ret = group group = [] return ret es = [] while e: nexte = e.next_sibling es.append(e) e.extract() e = nexte for e in es: # if e is text, yield lines # otherwise, yield element if isinstance(e,Comment): group.append(e) elif isinstance(e,NavigableString): m = nlStart.match(e) if m and m.group(1): if group: yield commit() e = m.group(1) lines = nl.split(e) end_newline = lines and lines[-1] == "" lines = [line for line in lines if lines] if lines: for line in lines[:-1]: # if not nlStart, group won't be empty! if not line: continue group.append(line) yield commit() if end_newline: if lines[-1]: group.append(lines[-1]) yield commit() else: if lines[-1]: group.append(lines[-1]) else: if group and e.name not in {'i','b','span','u','s','em','a'}: yield commit() group.append(e) if group: yield commit() def mdify(cur): cur_list = None try: cur.contents[0] except IndexError: return for group in lines(cur.contents[0]): def derp_list(): head = group[0][2:] tail = group[1:] li = doc.new_tag('li') li.append(head) for e in tail: li.append(e) cur_list.append(li) def derp_p(): if len(group) == 1 and not isinstance(group[0],str): cur.append(group[0]) return p = doc.new_tag('p') gotsome = False for e in group: gotsome = gotsome or e p.append(e) if gotsome: cur.append(p) if cur_list is None: pythonsucks = True def maybe_listaderp(): nonlocal cur_list if not group: return False if not isinstance(group[0],str): return False if group[0].startswith('- '): cur_list = doc.new_tag('ol') return True elif group[0].startswith('* '): cur_list = doc.new_tag('ul') return True return False if maybe_listaderp(): assert(cur_list) cur.append(cur_list) derp_list() else: derp_p() else: if isinstance(group[0],str): if group[0].startswith('- ') or group[0].startswith('* '): derp_list() else: cur_list = None derp_p() cur = doc.find('body') while hasattr(cur,'contents') and len(cur.contents) == 1: cur = cur.contents[0] # stackless traversal, starting depth first and # going up from there. # descend into the deepest contents first # then check if it's md and replace if so # then go to the next sibling if exists # otherwise go up to the parent if exist # otherwise, done! def mdifyAll(): nonlocal cur while cur: if hasattr(cur,'contents') and len(cur.contents): cur = cur.contents[0] continue if needsMD(cur): mdify(cur) if cur.next_sibling: cur = cur.next_sibling elif cur.parent: # like the outer loop, but w/out going down contents # go up the parents until we find a sibling cur = cur.parent while cur: nextcur = cur.next_sibling if cur.name == 'body': mdify(cur) return elif needsMD(cur): mdify(cur) if nextcur: cur = nextcur break else: cur = cur.parent else: break mdifyAll() return doc def markdown(template,source,defaultTitle=None): tmpl = BeautifulSoupSucks(template) mk = get(source,False) title = mk.find('title') or defaultTitle if title: title.extract() title = str(title.string) tmpl.find('title').append(title) tmpl.find('intitle').replace_with(title) mk = process(mk) bod = mk.find('body') if bod: bod = bod.contents else: bod = [mk] content = tmpl.find('content') while bod: derp = bod[-1] derp.extract() assert derp.parent is None content.insert_after(derp) content.decompose() return tmpl,title defaultTemplate = ''' <link type="text/css" href="styles.css" rel="stylesheet" /> </head> <body><div id="page"><h1><intitle/></h1><content/></div></body></html> ''' def getTemplate(default='template.xhtml'): template = os.environ.get('template') if not template: template = default if os.path.exists(template): with open(template,'rt') as inp: return inp.read() with open(template,'wt') as out: out.write(defaultTemplate) return defaultTemplate if __name__ == '__main__': doc,title = markdown(getTemplate(),sys.argv[1]) if 'pretty' in os.environ: print(doc.prettify()) else: print(str(doc))