もろもろハッカーニュースアンケート
プログラミングはじめて何年?
Poll: How long have you been programming?
http://news.ycombinator.com/item?id=3786926
Less than 6 months (22)
1 year (29)
2 years (48)
3-5 years (180)
7 years (163)
10+ years (500)
20+ years (202)
30+ years (76)
50+ years (1)
メインOSは?
Poll: What is your primary operating system
http://news.ycombinator.com/item?id=3786674
OSX (500)
Windows (262)
Linux (404)
Other Unix variant (9)
iOS (15)
Android (14)
Chrome OS (3)
Other (2)
最近ハンケート多すぎない?
ハッカーニュースアンケートグラフ生成プログラム。Unixパイプに限界を感じたのでズルしてpythonで書いた。あとはこれをwebでホスティングすれば、完全自動化できる。
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys,os
import re
import time
import lxml.html
import urllib2
import urllib
import jinja2default_template="""
<div>
<a href="{{original_url}}"><h3>{{title}}</h3></a>
<p>
<a href="{{original_url}}">{{original_url}}</a>
</p>
{% for vote in votes %}
<span>{{vote.0}} ({{vote.1}})</span>
<div style="width:{{vote.1}}px; height: 1em; background:#cceecc; border: 1px solid #000; margin-bottom: 1em;" >
</div>
{% endfor %}
</div>
"""def normalized_votes(trs, max_width=1000):
vts=list(votes(trs))
max_score=max([ pair[1] for pair in vts ])
return [ (label, max_width*score/max_score) for label, score in vts ]def votes(trs):
"""vote row elements to sequence of pairs (label, score)
"""
while True:
try:
item=trs.next()
comment=trs.next()
spacer=trs.next()
except StopIteration:
break
m=re.match(r'^(\d+)\s*\w+', comment.text_content())
if not m:
raise RuntimeError("failed to parse vote: " + str(item))
score=int(m.group(1))yield (item.text_content(), score)
def parse(html_str, **opt):
""" html_string --> page data dict to be rendered.
"""
lx = lxml.html.fromstring(html_str)
votes_table=lx.find('./body/center/table/tr/td/table/tr/td/table')
title_td=lx.xpath('.//td[@class="title"]')[0]
title_link=title_td.find('.//a')
return dict(votes=normalized_votes(iter(votes_table), **opt),
title=title_td.text_content(),
original_url=None if title_link is None \
else 'http://news.ycombinator.com/' + title_link.get('href'),
)def resolve_template(template=None):
# tdir,tname=os.path.split(template)
# env=jinja2.Environment(loader=jinja2.FileSystemLoader(tdir))
# return env.get_template(tname)if template is None:
template_str=default_template
elif os.path.exists(template):
template_str=file(template).read().decode('utf8')
else:
template_str=template
return jinja2.Environment().from_string(template_str)def fetch(url):
"""Resolve url|or-html-file to html string.
A simple caching is implemented so that you dont get banned from hn for repeated use.
"""
cache_file=os.path.join('/var/tmp/', 'hn-poll-'+urllib.quote(url).replace('/','-'))if not url.lower().startswith('http://'):
# accept local file
html_file=url
return file(html_file).read().decode('utf8')elif os.path.exists(cache_file) and time.time()-os.stat(cache_file).st_mtime<=3600:
# cache hit
print >>sys.stderr, 'cache-hit:', cache_file
html=file(cache_file).read()else:
# fetch
print >>sys.stderr, 'fetching:', url
html=urllib2.urlopen(url).read()
tmp_file=cache_file+'.tmp'
file(tmp_file, 'w').write(html)
os.rename(tmp_file, cache_file)return html.decode('utf8')
if __name__=='__main__':
import baker
import json@baker.command
def as_json(html_file, indent=None):
"""dumps (item,vote) pairs as json"""print json.dumps(parse(file(html_file).read()),
indent=indent if indent is None else int(indent))@baker.command
def as_html(hackernews_poll_url, template=None, max_width=1000):
"""render hackernews poll url into html graph.
"""
html=fetch(hackernews_poll_url)
template=resolve_template(template)
print template.render(**parse(html, max_width=int(max_width)))baker.run()