import amara import copy import os import shutil import time xhtml_ns = u'http://www.w3.org/1999/xhtml' namespaces = { u'xhtml': xhtml_ns } def ensureDirExists(dir): if not os.path.exists(dir): os.makedirs(dir) ensureDirExists('output') templateDoc = amara.parse('template.html', namespaces) def getOutputPath(path): return path.replace('input\\', 'output\\') def processDir(dir): for root, dirs, files in os.walk(dir): for file in files: ensureDirExists(getOutputPath(root)) path = os.path.join(root, file) f, ext = os.path.splitext(file) if ext == '.html' or ext == '.shtml': applyTemplateToFile(path) continue elif ext == '.xml': xmlDoc = amara.parse(path) rootElements = xmlDoc.xml_child_elements if u'index' in rootElements: buildIndex(xmlDoc, path) continue elif u'rss' in rootElements: buildFeed(xmlDoc, path) continue print 'Copying %s to output dir' % path shutil.copyfile(path, getOutputPath(path)) def writeOutputXml(path, doc): outputFile = file(getOutputPath(path), 'w') outputFile.write(doc.xml(encoding='iso-8859-1')) outputFile.close() def fixTemplatePaths(doc, path): numSlashes = path.count('\\') - 2 dir = '../' * numSlashes for element in doc.xml_xpath('//*[@href]') : if not u'//' in element.href: element.href = unicode(dir) + unicode(element.href) for element in doc.xml_xpath('//*[@src]') : if not u'//' in element.src: element.src = unicode(dir) + unicode(element.src) def applyTemplateToFile(path): print 'Applying template to %s' % path outputDoc = copy.deepcopy(templateDoc) fixTemplatePaths(outputDoc, path) pageDoc = amara.parse(path) for headElement in copy.deepcopy(pageDoc.html.head).xml_children: outputDoc.html.head.xml_append(headElement) contentDiv = outputDoc.xml_create_element(u'div', xhtml_ns, attributes={u'id': u'content'}) outputDoc.html.body.xml_append(contentDiv) heading1 = outputDoc.xml_create_element(u'h1', xhtml_ns, content=unicode(pageDoc.html.head.title)) contentDiv.xml_append(heading1) for bodyElement in copy.deepcopy(pageDoc.html.body).xml_children: contentDiv.xml_append(bodyElement) writeOutputXml(path, outputDoc) def buildIndex(indexDoc, path): print 'Building index %s' % path outputDoc = copy.deepcopy(templateDoc) fixTemplatePaths(outputDoc, path) outputDoc.html.head.xml_append(outputDoc.xml_create_element(u'title', content=unicode(indexDoc.index.title))) contentDiv = outputDoc.xml_create_element(u'div', xhtml_ns, attributes={u'id': u'content'}) outputDoc.html.body.xml_append(contentDiv) heading1 = outputDoc.xml_create_element(u'h1', xhtml_ns, content=unicode(indexDoc.index.title)) contentDiv.xml_append(heading1) for section in indexDoc.index.sections.xml_xpath('section'): heading2 = outputDoc.xml_create_element(u'h2', xhtml_ns, content=unicode(section.name)) contentDiv.xml_append(heading2) ulElement = outputDoc.xml_create_element(u'ul', xhtml_ns) contentDiv.xml_append(ulElement) for link in section.xml_xpath('link'): liElement = outputDoc.xml_create_element(u'li', xhtml_ns) ulElement.xml_append(liElement) f, ext = os.path.splitext(str(link)) if ext == '.html' or ext == '.shtml': referencedDoc = amara.parse(os.path.join('input\\thinkpond.org', str(link))) aElement = outputDoc.xml_create_element(u'a', xhtml_ns, attributes={u'href': unicode(link)}, content=unicode(referencedDoc.html.head.title)) else: aElement = outputDoc.xml_create_element(u'a', xhtml_ns, attributes={u'href': unicode(link)}, content=unicode(os.path.basename(str(link)))) ulElement.xml_append(aElement) outputBasePath, idxExt = os.path.splitext(path) writeOutputXml(outputBasePath + '.html', outputDoc) def buildFeed(feedDoc, path): print 'Building feed %s' % path for item in feedDoc.rss.channel.xml_xpath('item'): referencedDoc = amara.parse(os.path.join('input\\thinkpond.org', str(item.link)), prefixes=namespaces) feedDoc.rss.channel.lastBuildDate = unicode(time.strftime('%a, %d %b %Y %H:%M:%S PST')) item.link = u'http://thinkpond.org/' + unicode(item.link) item.xml_append(feedDoc.xml_create_element(u'guid', content=unicode(item.link))) item.xml_append(feedDoc.xml_create_element(u'title', content=unicode(referencedDoc.html.head.title))) introDivs = referencedDoc.html.body.xml_xpath(u'xhtml:div[@id="intro"]') if (len(introDivs) >= 1): element = feedDoc.xml_create_element(u'description') item.xml_append(element) element.xml_append(unicode(introDivs[0])) writeOutputXml(path, feedDoc) bodyFile.close() processDir('input')