sed


cat index.html | tr -d '\012' | sed -f removeHTML.sed | less

s/<title/\n<title/g
s/<table/\n<table/g
s/<\/table>/<\/table>\n/g
s/<tr/\n<tr/g
s/<\/tr>/<\/tr>\n/g
s/<p/\n<p/g
#s/<br/\n<br/g
s/\ \ */ /g
s/      / /g
s/>\ */>/g
s/\ *</</g
s/<tr\ .*>//g
#/\(<td\ .*>\)\(.*\)\(<\/td>\)/p
#s/\(<td\ .*>\)\(.*\)\(</td>\)/\2/g
#s/<table\ .*>//g