Revision 7673 (by gradha, 2006/12/23 23:47:38) During my last update to have both stable/unstable docs I reversed the
check to create some data for embedded pages.
#!/usr/bin/env python
# vim: tabstop=3 softtabstop=3 shiftwidth=3 expandtab
"""
This script was written by Grzegorz Adam Hankiewicz and is Giftware: you
are free to do what you want with it without any restriction. I do not
accept responsibility for any effects, adverse or otherwise, that this
script may have on you, your computer, your sanity, your dog, and
anything else that you can think of. Use it at your own risk.
"""
import fnmatch
import getopt
import glob
import os
import re
import string
import sys


SHORT_ARGUMENTS = "hvs:d:f:l:a:A:R:"
LONG_ARGUMENTS = ["help", "version", "source=", "destination=", "filter=",
   "localize=", "api=", "stable-api=", "redirect="]
VERSION = "external_dep_helper.py 0.2"
gSource = ""
gDestination = ""
gApi = ""
gDir_list = []
gInput = ""
gBase_dir = "latestdocs"
gSuffix = ""
gRedirect = ""


def show_program_usage():
   """Simple function which explains the basic switches you can use."""

   print "Dependency usage: %s -s src_path -d dest_path -l codes [-hv]\n" % os.path.split (sys.argv[0])[1]
   print "-h, --help            Shows this help message"
   print "-v, --version         Displays version and exits"
   print "-s x, --source=x      Use xxx as source directory"
   print "-d x, --destination=x Use xxx as destination directory"
   print "-l x, --localize=x    Use x language codes (ie. es:fr:de)"
   print
   print "Api dependency usage: %s -s src_path -a dest_path -l codes\n" % os.path.split (sys.argv[0])[1]
   print "-s x, --source=x      Use xxx as source directory"
   print "-a x, --api=x         Use xxx as api destination directory"
   print "-A x, --stable-api=x  Use xxx as stable api destination directory"
   print "-l x, --localize=x    Use x language codes (ie. en:es)"
   print "-R x, --redirect=x    Create redirection stubs under x/*"
   print
   print "Filter usage: %s -f file_to_filter [-hv]\n" % os.path.split (sys.argv[0])[1]
   print "-h, --help            Shows this help message"
   print "-v, --version         Displays version and exits"
   print "-f x, --filter=x      Filter xxx and strip some html"



def makedirs (complete, mask):
   """Replace of os.makedirs function, in hope to make this work with old python."""
   assert complete
   complete = string.replace (complete, "\\", "/")
   full = ""
   for f in string.split (complete, "/"):
      full = os.path.join (full, f)
      if not os.path.isdir (full):
         sys.stderr.write ("Trying to create '%s'\n" % full)
         os.mkdir (full, mask)



def process_arguments (args):
   """Parses commandline arguments."""
   global gDestination, gSource, gInput, gDir_list, gApi, gBase_dir, gSuffix, gRedirect
   try:
      options, files_to_process = getopt.getopt (args, SHORT_ARGUMENTS, LONG_ARGUMENTS)
   except getopt.error, msg:
      print "Error parsing arguments:\n", msg, "\n"
      show_program_usage()
      sys.exit (1)

   for op in options:
      if op[0] == "--source" or op[0] == "-s":
         gSource = op[1]
      elif op[0] == "--destination" or op[0] == "-d":
         gDestination = op[1]
      elif op[0] == "--api" or op[0] == "-a":
         gApi = op[1]
      elif op[0] == "--redirect" or op[0] == "-R":
         gRedirect = op[1]
         return
      elif op[0] == "--stable-api" or op[0] == '-A':
         gApi = op[1]
         gBase_dir = "stabledocs"
         gSuffix = "_stable"
      elif op[0] == "--version" or op[0] == "-v":
         print VERSION
         sys.exit (0)
      elif op[0] == "--localize" or op[0] == "-l":
         for f in string.split (op[1], ":"):
            if f: gDir_list.append (string.split (f, ",", 1)[0])
      elif op[0] == "--filter" or op[0] == "-f":
         gInput = op[1]
      elif op[0] == "--help" or op[0] == "-h":
         show_program_usage()
         sys.exit (0)

   if not gInput and (not gSource or (not gDestination and not gApi)):
      print "You must supply language codes and source and destination paths."
      show_program_usage()
      sys.exit (0)



def transform_email_addresses (line, exp):
   """This mangles email addresses to avoid spam bots. Give a string and the
   apropiate re expression to handle it.
   """
   found = exp.search (line)
   if found:
      return found.group ("pre") + \
         string.replace (string.replace (found.group ("mail"), "@", " AT "), \
         ".", " DOT ") + transform_email_addresses (found.group ("post"), exp)
   else:
      return line


def filter_html_file (input):
   """Loads html file and strips/changes unwanted things."""
   file = open (string.split (input, ":", 1)[0], "rt")
   print "#-%s" % string.split (input, ":", 1)[1]

   start_reading = re.compile (r"<body>|<body ", re.IGNORECASE)
   end_reading = re.compile (r"</body>", re.IGNORECASE)
   color_number = re.compile (r"(?P<pre>.*?)(?P<self>\d+\.\d+\.\d+)(?P<post>.*)")

   # This matches email addresses quite nicely
#   exp = re.compile (r"(?P<pre>.*?)(?P<mail>[-\.\w]+@([-\w]+\.)+[-\w]+)(?P<post>.*)")
   simple_substitutions = [("<h1>", "<h2>"), ("</h1>", "</h2>"),
      ("<h4>", ""), ("</h4>", "")]
   for filename in os.listdir(os.path.join("external", "en")):
      name, extension = os.path.splitext(filename)
      if extension == "._tx" and name != "allegro" :
         simple_substitutions.append(('"%s.html"' % name,
            '"%s/en/%s.html"' % (gBase_dir, name)))
   simple_substitutions.append(('"allegro.html"', '"%s/en/index.html"' % gBase_dir))

   line = file.readline()
   state = 0
   _replace = string.replace

   while line:
      line = string.rstrip (line)
      if not state: # search for body start
         if start_reading.search (line):
            state = 1
      else:
         if end_reading.search (line): # search for body end
            state = 0
         else:
            # process line which will be printed
            # if you uncomment the following mail, email addresses will be mangled.
            # line = transform_email_addresses (line, exp)
            for text1, text2 in simple_substitutions:
               line = _replace(line, text1, text2)

            found = color_number.search (line)
            if found:
               line = "%s<strong>%s</strong>%s" % (found.group ("pre"),
                  found.group ("self"), found.group ("post"))

            print line
      line = file.readline()
   file.close()



def generate_dependency_info():
   """Create dependency information for the few files which are
   integrated into the main website, which are faq, help and
   thanks. Also remove the stub index file to avoid dependency
   conflicts."""
   assert len (gDir_list) > 0

   cleanup = []
   _replace = string.replace

   # Generate simple langcode translations
   for langcode in gDir_list:
      for file in ["faq._tx", "help._tx", "thanks._tx"]:
         s = _replace(os.path.join(gSource, langcode, file), "\\", "/")
         d = _replace(os.path.join(gDestination, "%s.%s" % (file, langcode)), "\\", "/")

         if os.path.isfile(s):
            print "\n%s: %s $(MAKEDOC_DEP) src/force_update" % (d, s)
            print "\t$(MAKEDOC) -html $(EXTERNAL_TEMPFILE) $<"
            print "\t$(PRE_FILE_GENERATION)"
            print "\t$(EXTERNAL_DEP_HELPER) --filter=$(EXTERNAL_TEMPFILE):%s > $(STDOUTPUT)" % file
            print "\t$(POST_FILE_GENERATION)"
            print "\t$(RM) $(DEP_INFO)"
            print "EXTERNAL_DEPS += %s" % d
            cleanup.append (d)

   # Generate small cleanup commands.
   print "AUTOMATIC_CLEANUP_FILES += automatic_cleanup_three\n"
   print "automatic_cleanup_three:\n\t$(RM)",
   count = 0
   for f in cleanup:
      count = count + len (f)
      if count > 70:
         count = 0
         print "\n\t$(RM)",

      print f,

   print "\n"



def generate_api_info():
   """Get a file directory list"""
   assert len (gDir_list) > 0
   _join = os.path.join
   _replace = string.replace
   filelist = {}
   build_filelist = {}

   def print_generation_rule(dest, source):
      print "\n%s: %s $(MAKEDOC_DEP) src/force_update"  % (dest, source)
      print "\t$(PRE_FILE_GENERATION)"
      print "\t$(MAKEDOC) -html $(STDOUTPUT) $<"
      print "\t$(POST_FILE_GENERATION)"
      print "\nEXTERNAL_DEPS += %s" % dest

   generated_files = []
   # Generate simple langcode translations
   for langcode in gDir_list:
      # Add sources' filelist
      for file in glob.glob(_join(gSource, langcode, "*._tx")):
         filelist[os.path.basename(file)] = 1
      for file in glob.glob(_join(gSource, langcode, "build", "*._tx")):
         build_filelist[os.path.basename(file)] = 1

      # test for directory existance
      try:
         os.makedirs(_join(gApi, langcode, "build"), 0775)
      except OSError:
         pass

      # Generate dependency for each file
      templist = filelist.keys()
      templist.sort()
      for file in templist:
         s = _replace (_join (gSource, langcode, file), "\\", "/")
         destfile = _replace (file, "_tx", "html")
         d = _replace (_join (gApi, langcode, destfile), "\\", "/")

         if not os.path.isfile(s): # English default fallback
            s = _replace (_join (gSource, "en", file), "\\", "/")

         print_generation_rule(d, s)
         generated_files.append(d)

      templist = build_filelist.keys()
      templist.sort()
      for file in templist:
         s = _replace (_join (gSource, langcode, "build", file), "\\", "/")
         destfile = _replace (file, "_tx", "html")
         d = _replace (_join (gApi, langcode, "build", destfile), "\\", "/")

         if not os.path.isfile(s): # English default fallback
            s = _replace (_join (gSource, "en", "build", file), "\\", "/")

         print_generation_rule(d, s)
         generated_files.append(d)

   # Generate automatic cleanup rules
   print "\nEXTERNAL_DEPS += $(wildcard $(OUTDIR)%s/??/index*.html)\n" % gBase_dir
   print "AUTOMATIC_CLEANUP_FILES += automatic_cleanup_two%s\n" % gSuffix
   print "automatic_cleanup_two%s:" % gSuffix
   for langcode in gDir_list:
      print "\t$(RM) %s/*.html" % _replace (_join (gApi, langcode), "\\", "/")

   # Are we doing a stable build?
   if gSuffix:
      return

   # complete the api generation with non-mirrored files
   import mirror
   d = mirror.load_mirror_data("external/binary-files.txt", "out")
   d = map(lambda x: x[4], filter(lambda x: x[5][0] == "none", d))
   d = filter(lambda x: x not in generated_files, d)
   def filter_embedded_files(path):
      if os.path.splitext(os.path.basename(path))[0] in ("changes", "faq", "help"):
         return 0
      return 1
   d = filter(filter_embedded_files, d)

   # test for directory existance
   for path in map(os.path.dirname, d):
      try: os.makedirs(path, 0775)
      except OSError: pass

   for dest in d:
      langcode = os.path.basename(os.path.dirname(dest))
      new_name = "%s%s" % (os.path.splitext(os.path.basename(dest))[0], "._tx")
      print_generation_rule(dest, os.path.join("external", langcode, new_name))


def replace_html(filename):
   """Overwrites filename with stub HTML."""
   jumps = max(len(os.path.dirname(filename).split("/")) - 1, 0)
   rel_path = "%s%s" % ("../" * jumps, "api.html")
   file_output = open(filename, "wt")
   file_output.write("""<html><head><title>We moved!</title></head><body>
<h1>We moved!</h1>
Please check <a href="%s">%s</a>.
</body></html>""" % (rel_path, rel_path))
   file_output.close()


def create_redirection_stubs(base_dir):
   """Ugly hack to populate old onlinedocs/en/* structure.

   In the good old days, we only had one branch of the documentation, and
   it was always the wip. But now with 4.2.x times we maintain the 4.2.x
   and 4.3.x branches, which are also quite different in API terms. The
   old 'onlinedocs' subdirectory is split now in 'stabledocs' and
   'latestdocs', and to avoid breaking URLs and what else, this function
   populates the old 'onlinedocs' with stubs pointing to the right place.

   In order to be slightly 'inteligent', the function will only replace
   existing HTML files with stubs, meaning that if the site updating
   doesn't have an onlinedocs/en/files, nothing will be created.
   """
   for root, dirs, files in os.walk(base_dir):
      for filename in files:
         if fnmatch.fnmatch(filename, "*.html"):
            replace_html(os.path.join(root, filename))


def main (args):
   """Entry point of the helper script."""
   process_arguments (args[1:])

   if gRedirect:
      create_redirection_stubs(gRedirect)
      return

   if gInput:
      filter_html_file (gInput)
   elif len (gDir_list) > 0:
      if gApi:
         generate_api_info()
      elif not gSuffix:
         # Don't build dependency info for stable docs.
         generate_dependency_info()



if __name__ == "__main__":
   main (sys.argv)