#! /usr/bin/env python import urllib, re, subprocess, zipfile, time from itertools import izip disclaimer = """ Project Euler is protected under Attribution-Non-Commercial-Share Alike 2.0 UK: England & Wales """ prefix = "http://projecteuler.net/index.php?section=problems&id=" problems = range(1, 203) suffix = '' #extras = ['http://projecteuler.net/project/' + i for i in [ # 'names.txt','words.txt', 'poker.txt', 'cipher1.txt', # 'triangle.txt', 'keylog.txt', 'matrix.txt', 'roman.txt', # 'sudoku.txt', 'base_exp.txt', 'triangles.txt', 'sets.txt', # 'network.txt']] def source_html(): "generates original sources" prob_names = ('%s%i%s' % (prefix, i, suffix) for i in problems) return (urllib.urlopen(name).read() for name in prob_names) def tag_extract(text, start, end): return re.search('%s.*?%s' % (start, end), text).group() def prob_html(body): "extracts relivant part of full source" open_tag = '
' close_tag = '

' body = body.replace('\r', '') body = body.replace('\n', '') return tag_extract(body, open_tag, close_tag) def html_to_text(html): "just to make this hard to port" html_render = 'elinks -dump -force-html' p1 = subprocess.Popen(['echo', '"%s"' % html], stdout=subprocess.PIPE) p2 = subprocess.Popen(html_render.split(), stdin=p1.stdout, stdout=subprocess.PIPE) return p2.communicate()[0].strip().strip('"') def textify(s): "replaces commonly used images with strings" r = {"" : ' * ', "" : ' < ', "" : ' > '} for img in r: s = s.replace(img, r[img]) return s def pretty_heading(n): l1 = 'Problem %i' % n l2 = '=' * len(l1) return '\n\n\n' + l1 + '\n' + l2 + '\n' def pretty_time(): return '%i-%i-%i at %i:%i' % tuple(time.localtime()[:5]) #def zip_extras(): # f = zipfile.ZipFile('project_euler.zip', 'w') # for name in extras: # info = zipfile.ZipInfo(name.split('/')[-1]) # source = urllib.urlopen(name).read() # f.writestr(info, source) # f.close() def add_extra(body, zfile): "adds linked text to zipfile" name = re.search('/\w.*?\.txt', body).group() name = 'http://projecteuler.net/project' + name info = zipfile.ZipInfo(name.split('/')[-1]) text = urllib.urlopen(name).read() zfile.writestr(info, text) return zfile def save_euler(path): f = file(path, 'w') z = zipfile.ZipFile('project_euler.zip', 'w') f.write(disclaimer + '\n\n') f.write('generated on ' + pretty_time() + '\n') for source,i in izip(source_html(), problems): print 'problem', i if '.txt' in source: z = add_extra(source, z) f.write(pretty_heading(i)) f.write(html_to_text(prob_html(textify(source)))) f.close() z.close() save_euler('project_euler.txt') #zip_extras()