#!/usr/bin/env python3

from bs4 import BeautifulSoup
from bs4.element import NavigableString,PreformattedString,Comment, PageElement
import subprocess as s
import os,sys,re

def BeautifulSoupSucks(inp):
	return BeautifulSoup(inp,"html.parser")

nl = re.compile('\n+')

NLPAT = '\s*\n\s*'
nlStart = re.compile('^'+NLPAT+'(.*)',re.DOTALL)
nlEnd = re.compile('(.*?)'+NLPAT+'$',re.DOTALL)

def inline(e):
	return e.name in {'a','i','b','u','s','span','q','small'}

def needsMD(cur):
	if not hasattr(cur,'attrs'): return False
	if 'md' in cur.attrs:
		del cur.attrs['md']
		return True
	# also mdify class="author" because I forget that one.
	if not 'class' in cur.attrs: return False
	return 'author' in cur.attrs['class']

def depth(e):
	c = 0
	while e.parent:
		c += 1
		e = e.parent
	return c

things = "\\*_/"
outer = '\t\n .?!“”'
inner = '^' + "".join(set(things + outer).difference(set(".?!")))

md = re.compile("(["+outer+"])(?P<head>["+things+"]+)(["+inner+"]+)(?P=head)(["+outer+"])")

def realtags(m):
	try:
		head,mod,contents,tail = m.groups()
	except ValueError:
		print("uhhh", m.groups())
		raise
	if mod == '**':
		tag = 'b'
	elif mod in {'*','/','_'}:
		tag = 'i'
	else:
		raise RuntimeError("huh?",mod)
	return head + '<' + tag + '>' + contents + '</' + tag + '>' + tail
	

def get(source,stdin=False):
	if stdin:
		source = md.sub(realtags, source)
		source = '<body>' + source + '</body>'
		doc = BeautifulSoupSucks(source)
	else:
		with open(source) as inp:
			source = md.sub(realtags,inp.read())
			source = '<body>' + source + '</body>'
			doc = BeautifulSoupSucks(source)
	return doc

def fuckyou(e):
	space = re.compile("[\n\t]+")
	return repr(space.sub(' ',str(e))[:20])

def process(doc):
	class Thingy:
		before = None
		cur = None
		ended = True
		def __init__(self, tag, start):
			self.start = start
			self.tag = tag
			assert self.ended
		def end(self):
			if not self.cur: return
			self.cur.append('\n')
			self.ended = True
		def current(self):
			if not self.ended:
				return self.cur
			self.ended = False
			self.cur = doc.new_tag(self.tag)
			assert self.cur
			if self.before is None:
				self.start.insert(0,self.cur)
			else:
				self.before.insert_after(self.cur)
			self.before = self.cur
			return self.cur

	class ListThingy(Thingy):
		def __init__(self, start, ordered=True):
			top = doc.new_tag('ol' if ordered else 'ul')
			start.append(top)
			super().__init__('li', top)
		def end(self):
			if self.ended: return
			self.cur.insert_before('\n')
			self.ended = True

			# 1) yield each line, plus elements in between
			# 2) group those into newline delineated (before,e,after) lists
			# 3) act on those lists

	def lines(e):
		group = []
		seen = set()
		def commit():
			nonlocal group
			ret = group
			group = []
			return ret
		es = []
		while e:
			nexte = e.next_sibling
			es.append(e)
			e.extract()
			e = nexte
		for e in es:
			# if e is text, yield lines
			# otherwise, yield element
			if isinstance(e,Comment):
				group.append(e)
			elif isinstance(e,NavigableString):
				m = nlStart.match(e)
				if m and m.group(1):
					if group:
						yield commit()
					e = m.group(1)

				lines = nl.split(e)
				end_newline = lines and lines[-1] == ""
				lines = [line for line in lines if lines]
				if lines:
					for line in lines[:-1]:
						# if not nlStart, group won't be empty!
						if not line: continue
						group.append(line)
						yield commit()
					if end_newline:
						if lines[-1]:
							group.append(lines[-1])
							yield commit()
					else:
						if lines[-1]:
							group.append(lines[-1])
			else:
				if group and e.name not in {'i','b','span','u','s','em','a'}:
					yield commit()
				group.append(e)
		if group:
			yield commit()

	def mdify(cur):
		cur_list = None
		try: cur.contents[0]
		except IndexError: return
		for group in lines(cur.contents[0]):
			def derp_list():
				head = group[0][2:]
				tail = group[1:]
				li = doc.new_tag('li')
				li.append(head)
				for e in tail:
					li.append(e)
				cur_list.append(li)
			def derp_p():
				if len(group) == 1 and not isinstance(group[0],str):
					cur.append(group[0])
					return
				p = doc.new_tag('p')
				gotsome = False
				for e in group:
					gotsome = gotsome or e
					p.append(e)
				if gotsome:
					cur.append(p)
			if cur_list is None:
				pythonsucks = True
				def maybe_listaderp():
					nonlocal cur_list
					if not group: return False
					if not isinstance(group[0],str):
						return False
					if group[0].startswith('- '):
						cur_list = doc.new_tag('ol')
						return True
					elif group[0].startswith('* '):
						cur_list = doc.new_tag('ul')
						return True
					return False
				if maybe_listaderp():
					assert(cur_list)
					cur.append(cur_list)
					derp_list()
				else:
					derp_p()
			else:
				if isinstance(group[0],str):
					if group[0].startswith('- ') or group[0].startswith('* '):
						derp_list()
					else:
						cur_list = None
						derp_p()

	cur = doc.find('body')
	while hasattr(cur,'contents') and len(cur.contents) == 1:
		cur = cur.contents[0]

	# stackless traversal, starting depth first and
	# going up from there.

	# descend into the deepest contents first
	# then check if it's md and replace if so
	# then go to the next sibling if exists
	# otherwise go up to the parent if exist
	# otherwise, done!
	def mdifyAll():
		nonlocal cur
		while cur:
			if hasattr(cur,'contents') and len(cur.contents):
				cur = cur.contents[0]
				continue
			if needsMD(cur):
				mdify(cur)
			if cur.next_sibling:
				cur = cur.next_sibling
			elif cur.parent:
				# like the outer loop, but w/out going down contents
				# go up the parents until we find a sibling
				cur = cur.parent
				while cur:
					nextcur = cur.next_sibling
					if cur.name == 'body':
						mdify(cur)
						return
					elif needsMD(cur):
						mdify(cur)
					if nextcur:
						cur = nextcur
						break
					else:
						cur = cur.parent
			else:
				break
	mdifyAll()
	return doc

def markdown(template,source,defaultTitle=None):
	tmpl = BeautifulSoupSucks(template)
	mk = get(source,False)
	title = mk.find('title') or defaultTitle
	if title:
		title.extract()
		title = str(title.string)
		tmpl.find('title').append(title)
		tmpl.find('intitle').replace_with(title)
	mk = process(mk)
	bod = mk.find('body')
	if bod:
		bod = bod.contents
	else:
		bod = [mk]
	content = tmpl.find('content')
	while bod:
		derp = bod[-1]
		derp.extract()
		assert derp.parent is None
		content.insert_after(derp)
	content.decompose()
	return tmpl,title

defaultTemplate = '''<!DOCTYPE html>
<html>
  <head><meta charset="utf-8">
  <title/><link type="text/css" href="styles.css" rel="stylesheet" />
  </head>
<body><div id="page"><h1><intitle/></h1><content/></div></body></html>
'''

def getTemplate(default='template.xhtml'):
	template = os.environ.get('template')
	if not template:
		template = default
	if os.path.exists(template):
		with open(template,'rt') as inp:
			return inp.read()
	with open(template,'wt') as out:
		out.write(defaultTemplate)
	return defaultTemplate


if __name__ == '__main__':
	doc,title = markdown(getTemplate(),sys.argv[1])
	if 'pretty' in os.environ:
		print(doc.prettify())
	else:
		print(str(doc))
