7fde6b14b5cc3c79fff3a716d9ecc56d02f418e9
[Epoz.git] / Extensions / EpozPostTidy.py
1 ###
2 # EpozPostTidy.py
3 #
4 # This is just an example for transforming
5 # absolute urls to relative urls with Epoz.
6 #
7 # Use it at your own risk or improve it!
8 ###
9
10 from HTMLParser import HTMLParser
11 import re
12
13 # These tags will get a newline after the closing tag
14 blocktags = ['p', 'pre', 'div',
15 'table', 'tr', 'th', 'td', 'thead', 'tbody', 'tfoot',
16 'ul','ol','li',
17 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']
18
19 # Just a simple htmlparser
20 class aHTMLParser(HTMLParser):
21 res = ""
22
23 def handle_starttag(self, tag, attrs):
24 attributes=""
25 for (key,value) in attrs:
26 # Internal Link?
27 if (tag=="a" and key=="href") or (tag=="img" and key=="src"):
28 value = self.getRelativeUrl(self.pageurl, value)
29 attributes += ' %s="%s"' % (key,value)
30 self.res += "<%s%s>" % (tag, attributes)
31
32 def handle_endtag(self, tag):
33 self.res += "</%s>" % (tag,)
34 # Some pretty-nice-printing for block-elements
35 if tag in blocktags:
36 self.res += "\n"
37
38 def handle_startendtag(self, tag, attrs):
39 attributes=""
40 for (key,value) in attrs:
41 # Image?
42 if tag=="img" and key=="src":
43 value = self.getRelativeUrl(self.pageurl, value)
44 attributes += ' %s="%s"' % (key,value)
45 self.res += "<%s%s />" % (tag, attributes)
46
47 def handle_data(self, data):
48 self.res += data
49
50 def handle_charref(self, data):
51 self.res += "&%s;" % data
52
53 def handle_entityref(self, data):
54 self.res += "&%s;" % data
55
56 def handle_comment(self, data):
57 self.res += "<!-- %s -->"
58
59
60 def EpozPostTidy(self, html, pageurl):
61
62 # Create a parser
63 parser = aHTMLParser()
64
65 # Give the parser the global method for relative urls
66 parser.getRelativeUrl = self.EpozGetRelativeUrl
67
68 # Submit the pageurl as base-url for calculating urls
69 parser.pageurl = pageurl
70
71 # And now lets turn the wheels
72 parser.feed(html)
73 parser.close()
74
75 # Get & return postprocessed html from parser
76 html = parser.res
77
78 # Just some cleanups to remove useless whitespace
79 html = re.sub("[ ]+"," ",html)
80 html = re.sub("[\n]+","\n", html)
81
82 return html