#!/usr/bin/env python
# -*- mode:Python; tab-width: 4 -*-
"""
Analyses source code from the src directory and translations under
data. Then gathers if there are stale English entries (unused data)
and missing translations (untranslated but used data). It can also
gather if there are any obsolete entries (translated but unused
data).
Everything is done statically and output is generated on stdout,
so no locking is required. Run without arguments to get help.
"""
import getopt
import glob
import os
import re
import sys
INCLUDE_DATA_RE = re.compile(r"<include-data(?P<cond>[^=]*)=(?P<filename>\S+?)>(?P<post>.*)")
def show_program_usage(exit_code = 0):
"""f() -> no return."""
print """Usage: %s -d checkout_dir
-h, --help Shows this help message.
-v, --version Displays version and exits.
-d x, --dir=x Where `data' and `src' are located.
-m, --master Shows stale entries in the master language.
-o, --obsolete Shows also obsolete entries.
-e, --exclude=x List of colon separated languages to exclude.
-i, --include=x On top of not being excluded, a language must be included.
Example:
%s -d . -m -e ko:pl
""" % (sys.argv[0], sys.argv[0])
sys.exit(exit_code)
def process_arguments(arguments):
"""f([list of arguments]) -> (options)
Processes the list of command line arguments and returns the
active options as a tuple.
"""
try:
options, files_to_process = getopt.getopt(arguments,
"hvd:moe:i:", ["help", "version", "dir=", "master",
"obsolete", "exclude=", "include="])
except getopt.error, msg:
print "Error parsing arguments: %s" % msg
show_program_usage(1)
directory = None
show_stale_master = 0
show_obsolete = 0
exclude = []
include = []
for op, value in options:
if op in ("--dir", "-d"):
directory = value
elif op in ("--version", "-v"):
print version
sys.exit(0)
elif op in ("--help", "-h"):
show_program_usage(0)
elif op in ("--master", "-m"):
show_stale_master = 1
elif op in ("--obsolete", "-o"):
show_obsolete = 1
elif op in ("--exclude", "-e"):
exclude = value.split(":")
elif op in ("--include", "-i"):
include = value.split(":")
if not directory:
print "You must specify the root directory."
show_program_usage(1)
# Transform exclusion/inclusion list into dictionary.
exclude = dict(zip(exclude, [1] * len(exclude)))
include = dict(zip(include, [1] * len(include)))
return directory, show_stale_master, show_obsolete, exclude, include
def read_file_data_entries(filename):
"""f(filename) -> {data_entries}
Returns all the include-data entries from filename. The value
is always zero, a dictionary is used to avoid key repetition.
"""
entries = {}
input = file(filename, "rt")
for line in input.readlines():
while line:
m = INCLUDE_DATA_RE.search(line)
if m:
entries[os.path.normpath(m.group("filename"))] = 0
line = m.group("post")
else:
line = ""
input.close()
return entries
def read_src_dir(base_dir):
"""f(base_dir) -> {used_data_entries}
Reads all files in the base_dir/src directory and builds a
dictionary of include-data tags' parameters where the value
is always zero (to avoid key repetition).
"""
dir_pattern = os.path.join(base_dir, "src", "*")
data_entries = {}
for file in filter(os.path.isfile, glob.glob(dir_pattern)):
data_entries.update(read_file_data_entries(file))
return data_entries
def read_data_file(data_dir, filename):
"""f(data_dir, filename) -> (langcode, [list_of_entries])
Reads filename and extracts from it the language code. Also
returned, a list of data entries from the file.
"""
entries = []
langcode = filename [-2:]
pattern = filename[len(data_dir) + 1:-3]
input = file(filename, "rt")
for line in input.readlines():
if line[:2] == "#-":
entries.append("%s:%s" % (pattern, line[2:].rstrip()))
input.close()
return langcode, entries
def read_data_dir(base_dir):
"""f(base_dir) -> {data}
Reads all the files in the directories under base_dir/data and
returns a dictionary for each language, whose value is the
list of data used by the language and where (same path format as
used by <include> tags).
"""
data_dir = os.path.join(base_dir, "data")
dir_pattern = os.path.join(data_dir, "*", "*.??")
languages = {}
for file in filter(os.path.isfile, glob.glob(dir_pattern)):
lang, entries = read_data_file(data_dir, file)
if not languages.has_key(lang):
languages[lang] = []
languages[lang].extend(entries)
return languages
def main():
"""f() -> int
Main entry point. Returns zero on success.
"""
directory, show_stale_master, show_obsolete, exclude, include = (
process_arguments(sys.argv[1:]))
# Dict for quick searching, list for sorted output.
master_dict = read_src_dir(directory)
master_list = master_dict.keys()
master_list.sort()
data = read_data_dir(directory)
# Build a reverse dictionary of used English entries.
en_dict = dict(zip(data["en"], [1] * len(data["en"])))
# Add stale entries to the English master for completeness.
if show_stale_master:
for element in master_list:
if element not in en_dict:
print "Stale English %s" % element
# Finally, process all the languages.
for lang, list_data in data.iteritems():
# Skip uninteresting languages.
if lang in exclude:
continue
if include and lang not in include:
continue
if lang != "en":
# Build reverse dictionary.
available = dict(zip(list_data, [1] * len(list_data)))
# Create list of elements which are missing.
missing = []
for element in master_list:
if element not in available:
missing.append("%s:%s" % (lang, element))
# Show results.
if missing:
missing_percent = len(missing) / float(len(master_list)) * 100
print "%d blocks missing out of %d (%.02f%%) for `%s':" % (
len(missing), len(master_list), missing_percent, lang)
for element in missing:
print " %s" % element
print
else:
print "No missing entries for `%s'.\n" % lang
# Show obsolete entries.
if show_obsolete:
obsolete = []
list_data.sort()
for element in list_data:
if element not in master_dict:
obsolete.append("%s:%s" % (lang, element))
if obsolete:
print "%d entries obsolete in `%s':" % (len(obsolete), lang)
for element in obsolete:
print " %s" % element
print
else:
print "No obsolete entries in `%s'.\n" % lang
return 0
if __name__ == "__main__":
main()