#!/usr/bin/env python
# -*- mode:Python; tab-width: 3 -*-
"""
This program obtains DJGPP's ZIP picker web page and parses it
to obtain a list of download mirrors. Then, it updates whatever
specified template file with the new list only if the retrieved
data hash doesn't match the one previously stored there.
This script was written by Grzegorz Adam Hankiewicz and is Giftware:
you are free to do what you want with it without any restriction. I
do not accept responsibility for any effects, adverse or otherwise,
that this script may have on you, your computer, your sanity,
your dog, and anything else that you can think of. Use it at your
own risk.
$Id: update-mirror-list.py 7614 2006-12-02 18:13:42Z gradha $
"""
import getopt
import md5
import os
import re
import singleton
import sys
import urllib
short_arguments = "hvf:"
long_arguments = ["help", "version", "file="]
version = "update-mirro-list 0.2"
start_comment_tag = r"<!-- begin automatically updated mirror section: "
end_comment_tag = r"<!-- end automatically updated mirror section -->"
MAIN_URL = "http://www.delorie.com/djgpp/zip-picker.html"
REGEXP = re.compile(r'<option value="((ftp|http)://([^/]+)/[^"]+)">(.+)')
def show_program_usage(argv_zero = "", exit_code = 0):
"""Simple function which explains the commandline switches and exits.
argv_zero is the first entry in sys.argv, it's used to extract the name
of the script being run from the commandline. exit_code is the code
which will be returned to the OS.
"""
print "Usage: %s -f source_file_with_mirror_information [-hv]\n" % os.path.split(argv_zero)[1]
print "-h, --help Shows this help message"
print "-v, --version Displays version and exits"
print "-f, --file=xxx The file to be updated with the mirror list"
print
sys.exit(exit_code)
def process_arguments(arguments):
"""Processes the arguments used to invoke the program.
Returns the filename to be modified.
"""
try:
optlist, args = getopt.getopt(arguments[1:], short_arguments, long_arguments)
except getopt.error, msg:
print "Error parsing arguments:\n", msg, "\n"
show_program_usage(arguments[0], 1)
file_name = ""
for option, value in optlist:
if option in ("--help", "-h"):
show_program_usage(arguments[0], 0)
elif option in ("--version", "-v"):
print version
sys.exit(0)
elif option in ("--file", "-f"):
file_name = value
else:
print "Option '%s' not recognized\n" % option
show_program_usage(arguments[0], 1)
if not file_name:
print "You need to specify the file where I'll update the mirror list"
show_program_usage(arguments[0], 1)
return file_name
def load_and_split_source(file_name):
"""load_and_split_source(file_name) -> hash, 3 line lists tuple
Tries to load the specified file and searches its content for
the marking comments which delimit the automatically generated
text section. The tuple will contain three line lists: header,
body and footer. The headerand footer are the static parts of the
source file and they don't have to me modified. body is the bunch
of lines which can be modified, and it doesn't include the lines
which contain either the begin or end HTML comments. hash is the
md5 hash retrieved from the comment which starts the body section.
"""
start_exp = re.compile(start_comment_tag +
r"\s*(?P<hash>\w{32})")
end_exp = re.compile(end_comment_tag)
file = open(file_name, "rt")
tuple = ([], [], [])
selected = 0
hash = "x" * 32
line = file.readline()
while line:
# first look for beginning comment and extract it's date
if selected == 0 and start_exp.match(line):
hash = start_exp.match(line).group("hash")
if not hash:
raise "Couldn't extract hash from beginning comment:\n%s" % line
selected += 1
# then look for end comment
elif selected == 1 and end_exp.match(line):
selected += 1
else:
# meanwhile add the rest of the lines to the appropriate section
tuple[selected].append(line)
line = file.readline()
file.close()
if selected != 2:
raise "Parsing error, didn't detect three sections in %s" % file_name
return hash, tuple
def get_djgpp_mirrors():
"""f() -> [(url, site, description)]
Retrieves from MAIN_URL the addresses of DJGPP's mirrors. The HTML
of the web page is parsed and a list of tuples returned. Each
tuple consists of the full URL to the Allegro directory, the
basic domain name, and the text description associated to that
mirror as a text string in English.
"""
input = urllib.urlopen(MAIN_URL)
mirrors = []
try:
line = input.readline()
while line:
m = REGEXP.match(line)
if m:
mirrors.append(("%s%s" % (m.group(1), "v2tk/allegro/"),
"%s.%s" % (m.group(2), m.group(3)), m.group(4)))
line = input.readline()
return mirrors
finally:
input.close()
def calculate_md5(data):
"""f([(a, b, c), ...]) -> string with md5 hash
Given a list of tuples with three elements, builds a long string
containing data and returns the appropriated md5 hash.
"""
lines = []
for triplet in data:
lines.append("%s%s%s" % triplet)
m = md5.new("".join(lines))
return m.hexdigest()
def update_source_file(file_name, hash, mirrors, header, footer):
"""Updates file_name with the given data.
Opens file_name and writes the information contained in mirrors,
putting header and footer around the generated lines.
"""
body = generate_html_code(mirrors)
file = open(file_name, "wt")
for line in header:
file.write(line)
file.write("%s%s -->\n" % (start_comment_tag, hash))
for line in body:
file.write(line)
file.write("%s\n" % end_comment_tag)
for line in footer:
file.write(line)
file.close()
def generate_html_code(mirrors):
"""generate_html_code([(a, b, c), ...]) -> list of strings
Given a list of tuples in the form (url, site, description),
returns a list of strings which compose an HTML chunk of links
to those URLs. First the defines/macros are created. Then,
a list of links is written as a table to use different colors.
"""
colors = ('class="row1"', 'class="row2"')
lines = []
def full(string):
return os.path.join(string, allegro_simtel_dir)
for dummy, site, description in mirrors:
lines.append("#!- <%s> %s\n" % (site, description))
lines.append("<include-data=simtel/simtel:replace_default_values>\n")
lines.append("<table width=\"100%\">\n")
for url, site, dummy in mirrors:
lines.append('<tr %s><td><a href="%s"><%s></a><br></td></tr>\n' %
(colors[0], url, site))
colors = (colors[1], colors[0])
lines.append("</table>\n")
return lines
def main(argv = None):
"""Entry point of the script."""
if argv is None:
argv = sys.argv
file_name = process_arguments(argv)
old_md5, section = load_and_split_source(file_name)
mirrors = get_djgpp_mirrors()
new_md5 = calculate_md5(mirrors)
if old_md5 != new_md5:
print "Updating"
update_source_file(file_name, new_md5, mirrors, section[0], section[2])
if __name__ == "__main__":
"""Protect the entry point with a file lock."""
singleton.run_if_possible(main)