4 # This is just an example for transforming
5 # absolute urls to relative urls with Epoz.
7 # Use it at your own risk or improve it!
10 from HTMLParser
import HTMLParser
13 # These tags will get a newline after the closing tag
14 blocktags
= ['p', 'pre', 'div',
15 'table', 'tr', 'th', 'td', 'thead', 'tbody', 'tfoot',
17 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']
19 # Just a simple htmlparser
20 class aHTMLParser(HTMLParser
):
23 def handle_starttag(self
, tag
, attrs
):
25 for (key
,value
) in attrs
:
27 if (tag
=="a" and key
=="href") or (tag
=="img" and key
=="src"):
28 value
= self
.getRelativeUrl(self
.pageurl
, value
)
29 attributes
+= ' %s="%s"' % (key
,value
)
30 self
.res
+= "<%s%s>" % (tag
, attributes
)
32 def handle_endtag(self
, tag
):
33 self
.res
+= "</%s>" % (tag
,)
34 # Some pretty-nice-printing for block-elements
38 def handle_startendtag(self
, tag
, attrs
):
40 for (key
,value
) in attrs
:
42 if tag
=="img" and key
=="src":
43 value
= self
.getRelativeUrl(self
.pageurl
, value
)
44 attributes
+= ' %s="%s"' % (key
,value
)
45 self
.res
+= "<%s%s />" % (tag
, attributes
)
47 def handle_data(self
, data
):
50 def handle_charref(self
, data
):
51 self
.res
+= "&%s;" % data
53 def handle_entityref(self
, data
):
54 self
.res
+= "&%s;" % data
56 def handle_comment(self
, data
):
57 self
.res
+= "<!-- %s -->"
60 def EpozPostTidy(self
, html
, pageurl
):
63 parser
= aHTMLParser()
65 # Give the parser the global method for relative urls
66 parser
.getRelativeUrl
= self
.EpozGetRelativeUrl
68 # Submit the pageurl as base-url for calculating urls
69 parser
.pageurl
= pageurl
71 # And now lets turn the wheels
75 # Get & return postprocessed html from parser
78 # Just some cleanups to remove useless whitespace
79 html
= re
.sub("[ ]+"," ",html
)
80 html
= re
.sub("[\n]+","\n", html
)