#!/usr/bin/python
import web
import feedparser
import shelve
import hashlib
import time
import re
import sys
urls = (
'/', 'usage',
'(.*)', 'test'
)
app = web.application(urls, globals())
class usage:
def GET(self):
usage = '''
Hello, world!
'''
return usage
class test:
def GET(self, path):
self.resmax = 3
self.res = 0
path = path.strip("/").split("/")
last = path[-1]
if re.search("\.dat$", last):
url = "/".join(path[:-3])
rss = self.get_rss(url, forcedb=True)
dat = self.search_dat(rss, last)
return dat
elif re.search("subject\.txt$", last):
url = "/".join(path[:-2])
rss = self.get_rss(url)
subject = self.conv_subject(rss)
return subject
else:
index = "dat/\nsubject.txt\n"
return index
def get_rss(self, url, forcedb=False):
md5hash = hashlib.md5(url).hexdigest()
db = shelve.open("feed.shelve")
try:
if forcedb or (db["%s_mtime" % md5hash]+60*5 > time.time()):
rss = db["%s_body" % md5hash]
else:
rss = self.write_rss(url, md5hash, db)
except KeyError:
rss = self.write_rss(url, md5hash, db)
db.close()
return rss
def write_rss(self, url, md5hash, db):
rss = feedparser.parse("http://" + url)
db["%s_mtime" % md5hash] = time.time()
db["%s_body" % md5hash] = rss
return rss
def tags_title(self, entry_tags):
tags = []
for tag in entry_tags:
tags.append(tag["term"])
tags = ",".join(tags)
return tags
def conv_subject(self, rss):
subject_txt = []
res = "1"
for entry in rss["entries"]:
try:
tags = self.tags_title(entry["tags"])
except KeyError:
tags = ""
dat = int(time.mktime(entry["updated_parsed"]))
title = entry["title"]
subject = "%s.dat<>[%s] %s (%s)\n" % (dat, tags, title, res)
subject_txt.append(subject)
subject_txt = "".join(subject_txt)
return self.utf8tosjis(subject_txt)
def search_dat(self, rss, datfile):
epoch = int(re.sub("\.dat$", "", datfile))
date = time.localtime(epoch)
for entry in rss["entries"]:
if date == entry["updated_parsed"]:
return self.conv_dat(entry, rss)
return "not found."
def conv_dat(self, entry, rss):
#name = entry["author"]
name = "Anonymous"
#mail = entry[""]
mail = ""
date = time.strftime("%Y/%m/%d %H:%M:%S (%a)", entry["updated_parsed"])
id_split = entry["id"].split("/")
id = "" + id_split[-1] +" "
#host = "/".join(id_split[:-1])
if entry["content"]:
body = self.untag(entry["content"][0]["value"])
else:
body = entry["summary"]
anchors = []
for i in range(self.resmax):
(anchor, anchors) = self.search_anchor(body, rss, anchors)
body = body + "
" + anchor
try:
tags = self.tags_title(entry["tags"])
except KeyError:
tags = ""
title = entry["title"]
dat = "%s<>%s<>%s ID:%s<>%s<>[%s] %s\n" % (name, mail, date, id, body, tags, title)
return self.utf8tosjis(dat)
def untag(self, html, quote=None):
u = re.compile("*[^>]*/*>")
n = re.compile("\n")
html = u.sub("", html)
html = n.sub("
", html)
if quote is not None:
html = re.sub("^", quote, html)
html = re.sub("
", "
" + quote, html)
return html
def search_anchor(self, body, rss, anchors):
#self.res += 1
#if self.res > self.resmax:
# return ""
resanchor = body.split(">>")
body = []
anchor = []
for b in resanchor[1:]:
match = re.search("^([0-9a-z]{8})", b)
if match:
if match.group(1) not in anchor:
anchor.append(match.group(1))
for entry in rss["entries"]:
id = entry["id"].split("/")[-1]
if id in anchor and id not in anchors:
anchors.append(id)
body.append("%s wrote:" % id)
if entry["content"]:
body.append(self.untag(entry["content"][0]["value"], quote=" > "))
else:
body.append(" > " + entry["summary"])
return ("
" + "
".join(body), anchors)
def utf8tosjis(self, data):
return unicode(data).encode("Shift_JIS", "replace")
if __name__ == '__main__':
app.run()