もろもろハッカーニュースアンケート

プログラミングはじめて何年?


Poll: How long have you been programming?


http://news.ycombinator.com/item?id=3786926



Less than 6 months (22)



1 year (29)



2 years (48)



3-5 years (180)



7 years (163)



10+ years (500)



20+ years (202)



30+ years (76)



50+ years (1)


メインOSは?

最近ハンケート多すぎない?


[dead]


None



yes (500)



no (227)


ハッカーニュースアンケートグラフ生成プログラム。Unixパイプに限界を感じたのでズルしてpythonで書いた。あとはこれをwebでホスティングすれば、完全自動化できる。


#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys,os
import re
import time
import lxml.html
import urllib2
import urllib
import jinja2

default_template="""
<div>
<a href="{{original_url}}"><h3>{{title}}</h3></a>
<p>
<a href="{{original_url}}">{{original_url}}</a>
</p>

{% for vote in votes %}
<span>{{vote.0}} ({{vote.1}})</span>
<div style="width:{{vote.1}}px; height: 1em; background:#cceecc; border: 1px solid #000; margin-bottom: 1em;" >

</div>
{% endfor %}
</div>
"""

def normalized_votes(trs, max_width=1000):
vts=list(votes(trs))
max_score=max([ pair[1] for pair in vts ])
return [ (label, max_width*score/max_score) for label, score in vts ]

def votes(trs):
"""vote row elements to sequence of pairs (label, score)
"""
while True:
try:
item=trs.next()
comment=trs.next()
spacer=trs.next()
except StopIteration:
break

m=re.match(r'^(\d+)\s*\w+', comment.text_content())
if not m:
raise RuntimeError("failed to parse vote: " + str(item))
score=int(m.group(1))

yield (item.text_content(), score)

def parse(html_str, **opt):
""" html_string --> page data dict to be rendered.
"""
lx = lxml.html.fromstring(html_str)
votes_table=lx.find('./body/center/table/tr/td/table/tr/td/table')
title_td=lx.xpath('.//td[@class="title"]')[0]
title_link=title_td.find('.//a')
return dict(votes=normalized_votes(iter(votes_table), **opt),
title=title_td.text_content(),
original_url=None if title_link is None \
else 'http://news.ycombinator.com/' + title_link.get('href'),
)

def resolve_template(template=None):

# tdir,tname=os.path.split(template)
# env=jinja2.Environment(loader=jinja2.FileSystemLoader(tdir))
# return env.get_template(tname)

if template is None:
template_str=default_template
elif os.path.exists(template):
template_str=file(template).read().decode('utf8')
else:
template_str=template
return jinja2.Environment().from_string(template_str)

def fetch(url):
"""Resolve url|or-html-file to html string.
A simple caching is implemented so that you dont get banned from hn for repeated use.
"""
cache_file=os.path.join('/var/tmp/', 'hn-poll-'+urllib.quote(url).replace('/','-'))

if not url.lower().startswith('http://'):
# accept local file
html_file=url
return file(html_file).read().decode('utf8')

elif os.path.exists(cache_file) and time.time()-os.stat(cache_file).st_mtime<=3600:
# cache hit
print >>sys.stderr, 'cache-hit:', cache_file
html=file(cache_file).read()

else:
# fetch
print >>sys.stderr, 'fetching:', url
html=urllib2.urlopen(url).read()
tmp_file=cache_file+'.tmp'
file(tmp_file, 'w').write(html)
os.rename(tmp_file, cache_file)

return html.decode('utf8')

if __name__=='__main__':

import baker
import json

@baker.command
def as_json(html_file, indent=None):
"""dumps (item,vote) pairs as json"""

print json.dumps(parse(file(html_file).read()),
indent=indent if indent is None else int(indent))

@baker.command
def as_html(hackernews_poll_url, template=None, max_width=1000):
"""render hackernews poll url into html graph.
"""

html=fetch(hackernews_poll_url)
template=resolve_template(template)
print template.render(**parse(html, max_width=int(max_width)))

baker.run()