#! /usr/bin/env python
import urllib, re, subprocess, zipfile, time
from itertools import izip
disclaimer = """
Project Euler is protected under
Attribution-Non-Commercial-Share Alike 2.0 UK: England & Wales
"""
prefix = "http://projecteuler.net/index.php?section=problems&id="
problems = range(1, 203)
suffix = ''
#extras = ['http://projecteuler.net/project/' + i for i in [
# 'names.txt','words.txt', 'poker.txt', 'cipher1.txt',
# 'triangle.txt', 'keylog.txt', 'matrix.txt', 'roman.txt',
# 'sudoku.txt', 'base_exp.txt', 'triangles.txt', 'sets.txt',
# 'network.txt']]
def source_html():
"generates original sources"
prob_names = ('%s%i%s' % (prefix, i, suffix) for i in problems)
return (urllib.urlopen(name).read() for name in prob_names)
def tag_extract(text, start, end):
return re.search('%s.*?%s' % (start, end), text).group()
def prob_html(body):
"extracts relivant part of full source"
open_tag = '
'
close_tag = '
'
body = body.replace('\r', '')
body = body.replace('\n', '')
return tag_extract(body, open_tag, close_tag)
def html_to_text(html):
"just to make this hard to port"
html_render = 'elinks -dump -force-html'
p1 = subprocess.Popen(['echo', '"%s"' % html], stdout=subprocess.PIPE)
p2 = subprocess.Popen(html_render.split(), stdin=p1.stdout, stdout=subprocess.PIPE)
return p2.communicate()[0].strip().strip('"')
def textify(s):
"replaces commonly used images with strings"
r = {"
" : ' * ',
"
" : ' < ',
"
" : ' > '}
for img in r:
s = s.replace(img, r[img])
return s
def pretty_heading(n):
l1 = 'Problem %i' % n
l2 = '=' * len(l1)
return '\n\n\n' + l1 + '\n' + l2 + '\n'
def pretty_time():
return '%i-%i-%i at %i:%i' % tuple(time.localtime()[:5])
#def zip_extras():
# f = zipfile.ZipFile('project_euler.zip', 'w')
# for name in extras:
# info = zipfile.ZipInfo(name.split('/')[-1])
# source = urllib.urlopen(name).read()
# f.writestr(info, source)
# f.close()
def add_extra(body, zfile):
"adds linked text to zipfile"
name = re.search('/\w.*?\.txt', body).group()
name = 'http://projecteuler.net/project' + name
info = zipfile.ZipInfo(name.split('/')[-1])
text = urllib.urlopen(name).read()
zfile.writestr(info, text)
return zfile
def save_euler(path):
f = file(path, 'w')
z = zipfile.ZipFile('project_euler.zip', 'w')
f.write(disclaimer + '\n\n')
f.write('generated on ' + pretty_time() + '\n')
for source,i in izip(source_html(), problems):
print 'problem', i
if '.txt' in source:
z = add_extra(source, z)
f.write(pretty_heading(i))
f.write(html_to_text(prob_html(textify(source))))
f.close()
z.close()
save_euler('project_euler.txt')
#zip_extras()