Sign.py :  » Build » A-A-P » aap-1.091 » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Build » A A P 
A A P » aap 1.091 » Sign.py
# Part of the A-A-P recipe executive: Store signatures

# Copyright (C) 2002-2003 Stichting NLnet Labs
# Permission to copy and use this file is specified in the file COPYING.
# If this file is missing you can find it here: http://www.a-a-p.org/COPYING

#
# This module handles remembering signatures of targets and sources.
#

import os
import os.path
import string

# md5 is deprecated in Python 2.6, avoid a warning.
try:
    import hashlib
except ImportError:
    import md5
    class hashlib:
        md5 = md5.new

import time

from Util import *
from Message import *
from Filetype import ft_detect
import Global

# Both "signatures" dictionaries are indexed by the name of the target Node
# (file or directory).
# For non-virtual nodes the absulute name is used.
# Each entry is a dictionary indexed by the source-name@check-name and has a
# string value.
# The "buildcheck" entry is used for the build commands.
# The "signfile" entry is used to remember the sign file that stores the
# signatures for this target.
# "old_signatures" is for the signatures when we started.
# "upd_signatures" is for the signatures of items for which the build commands
# were successfully executed and are to be stored for the next time.
# Example:
# {"/aa/bb/file.o" : {  "signfile" : "/aa/bb/AAPDIR/sign",
#                       "/aa/bb/file.c@md5" : "13a445e5",
#                       "buildcheck" : "-O2"},
#  "/aa/bb/bar.o"  : {  "signfile" : "/aa/bb/mysign",
#                       "/aa/bb/bar-debug.c@time" : "143234",
#                       "aa/bb/bar.h@time" : "423421"}}
old_signatures = {}
upd_signatures = {}

# "new_signatures" caches the signatures we computed this invocation.  It is a
# dictionary of dictionaries:
#   new_signatures["/path/file"]["md5"] = md5hex("/path/file")
# The key for the toplevel dictionary is the Node name.
# The key for the second level is the check name.  The target name isn't used
# here.
new_signatures = {}

# "chd_signatures" remembers which files were marked as changed with --changed
# or ":changed".
chd_signatures = {}

# Key used for the timestamp on the signature entry.  Used to find the last
# updated entry for published files.
timekey = "lastupdate"

# Name for the sign file relative to the directory of the target or the recipe.
sign_normal_fname = in_aap_dir("sign")
sign_normal_fname_len = len(sign_normal_fname)

# Remember which sign files have been read.
# Also when the file couldn't actually be read, so that we remember to write
# this file when signs have been updated.
# An entry exists when the file has been read.  It's value is non-zero when the
# file should be written back.
sign_files = {}

def get_sign_file(recdict, target, update):
    """Get the sign file that is used for "target" if it wasn't done already.
       When "update" is non-zero, mark the file needs writing."""
    fname = fname_fold(target.get_sign_fname())
    if not sign_files.has_key(fname):
        sign_files[fname] = update
        sign_read(recdict, fname)
    elif update:
        sign_files[fname] = 1


def sign_file_dir(fname):
    """Return the directory to which files in sign file "fname" are relative
       to.  Use uniform format (forward slashes)."""
    # When using "AAPDIR/sign" remove two parts, otherwise only remove the file
    # name itself.
    if (len(fname) >= sign_normal_fname_len
            and fname_fold(fname[-sign_normal_fname_len:])
                                             == fname_fold(sign_normal_fname)):
        fname = os.path.dirname(fname)
    return fname_fold(os.path.dirname(fname))


# In the sign files, file names are stored with a leading "-" for a virtual
# node and "=" for a file name.  Expand to an absolute name for non-virtual
# nodes.
def sign_expand_name(recdict, dir, name):
    """Expand "name", which is used in a sign file in directory "dir" or
    "dir/AAPDIR"."""
    n = name[1:]
    if name[0] == '-' or os.path.isabs(n):
        return n

    # Make a full path by joining the dir and the file name.
    n_len = len(n)
    if n_len <= 3 or n[:3] != "../":
        return fname_fold(os.path.join(dir, n))

    # Remove "../" items.  Don't use os.path.normpath(), it's a bit slow (it
    # does more than removing ".." items).
    di = len(dir)
    ni = 3
    while 1:
        di = string.rfind(dir, "/", 0, di)
        if di < 0:
            # "cannot happen": sign file corrupted?  Return the name with the
            # ".." (equivalent to sign not found).
            msg_error(recdict, _('In sign file: Too many ".." in "%s" for directory "%s"') % (name, dir))
            return fname_fold(os.path.join(dir, n))
        if ni + 3 >= n_len or n[ni:ni+3] != "../":
            break
        ni = ni + 3

    return dir[:di + 1] + n[ni:]

def sign_reduce_name(dir, name):
    """Reduce "name" to what is used in a sign file."""
    if os.path.isabs(name):
        return '=' + fname_fold(shorten_name(name, dir))
    return '-' + fname_fold(name)


#
# A sign file stores the signatures for items (sources and targets) with the
# values they when they were computed in the past.
# The format of each line is:
#       =foo.o<ESC>=foo.c@md5_c=012346<ESC>...<ESC>\n
# "md5_c" can be "md5", "time", etc.  Note that it's not always equal to
# the "check" attribute, both "time" and "older" use "time" here.

def sign_read(recdict, fname):
    """Read the signature file "fname" into our dictionary of signatures."""
    basedir = sign_file_dir(fname)
    try:
        f = open(fname, "rb")
        for line in f.readlines():
            e = string.find(line, "\033")
            if e > 0:   # Only use lines with an ESC
                name = sign_expand_name(recdict, basedir, line[:e])
                old_signatures[name] = {"signfile" : fname_fold(fname)}
                while 1:
                    s = e + 1
                    e = string.find(line, "\033", s)
                    if e < 1:
                        break
                    i = string.rfind(line, "=", s, e)
                    if i < 1:
                        break
                    old_signatures[name][sign_expand_name(recdict,
                                           basedir, line[s:i])] = line[i + 1:e]
        f.close()
    except StandardError, e:
        # TODO: handle errors?  It's not an error if the file does not exist.
        msg_note(recdict, (_('Cannot read sign file "%s": ')
                                               % shorten_name(fname)) + str(e))


def sign_write_all(recdict):
    """Write all updated signature files from our dictionary of signatures."""

    # This assumes we are the only one updating this signature file, thus there
    # is no locking.  It wouldn't make sense sharing with others, since
    # building would fail as well.
    for fname in sign_files.keys():
        if sign_files[fname]:
            # This sign file needs to be written.
            sign_write(recdict, fname)

def sign_write(recdict, fname):
    """Write one updated signature file."""
    sign_dir = os.path.dirname(fname)
    if not os.path.exists(sign_dir):
        try:
            os.makedirs(sign_dir)
        except StandardError, e:
            msg_warning(recdict,
                        (_('Cannot create directory for signature file "%s": ')
                                                             % fname) + str(e))
    try:
        f = open(fname, "wb")
    except StandardError, e:
        msg_warning(recdict,
                          (_('Cannot open signature file for writing: "%s": ')
                              % fname) + str(e))
        return

    def write_sign_line(f, basedir, s, old, new):
        """Write a line to sign file "f" in directory "basedir" for item "s",
        with checks from "old", using checks from "new" if they are present."""
        f.write(sign_reduce_name(basedir, s) + "\033")

        # Go over all old checks, write all of them, using the new value
        # if it is available.
        for c in old.keys():
            if c != "signfile":
                if new and new.has_key(c):
                    val = new[c]
                else:
                    val = old[c]
                f.write("%s=%s\033" % (sign_reduce_name(basedir, c), val))

        # Go over all new checks, write the ones for which there is no old
        # value.
        if new:
            for c in new.keys():
                if c != "signfile" and not old.has_key(c):
                    f.write("%s=%s\033" % (sign_reduce_name(basedir, c),
                                                                       new[c]))

        f.write("\n")

    basedir = sign_file_dir(fname)
    try:
        # Go over all old signatures, write all of them, using checks from
        # upd_signatures when they are present.
        # When the item is in upd_signatures, use the directory specified
        # there, otherwise use the directory of old_signatures.
        for s in old_signatures.keys():
            if upd_signatures.has_key(s):
                if upd_signatures[s]["signfile"] != fname:
                    continue
                new = upd_signatures[s]
            else:
                if old_signatures[s]["signfile"] != fname:
                    continue
                new = None
            write_sign_line(f, basedir, s, old_signatures[s], new)


        # Go over all updated signatures, write only the ones for which there
        # is no old signature.
        for s in upd_signatures.keys():
            if (not old_signatures.has_key(s)
                                   and upd_signatures[s]["signfile"] == fname):
                write_sign_line(f, basedir, s, upd_signatures[s], None)

        f.close()
    except StandardError, e:
        msg_warning(recdict, (_('Write error for signature file "%s": '),
                                                               fname) + str(e))

def hexdigest(m):
    """Turn an md5 object into a string of hex characters."""
    # NOTE:  This routine is a method in the Python 2.0 interface
    # of the native md5 module, not in Python 1.5.
    h = string.hexdigits
    r = ''
    for c in m.digest():
        i = ord(c)
        r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
    return r


def check_md5(recdict, fname, msg = 1):
    if not os.path.isfile(fname):
        # A non-existing file isn't that bad, could be a virtual target that
        # wasn't marked as being virtual.
        if msg:
            msg_note(recdict,
                  _('Cannot compute md5 checksum for "%s": it does not exist')
                  % fname)
        return "unknown"

    try:
        f = open(fname, "rb")
        m = hashlib.md5()
        while 1:
            # Read big blocks at a time for speed, but don't read the whole
            # file at once to reduce memory usage.
            data = f.read(32768)
            if not data:
                break
            m.update(data)
        f.close()
        res = hexdigest(m)
    except StandardError, e:
        if msg:
            msg_warning(recdict, (_('Cannot compute md5 checksum for "%s": ')
                                                             % fname) + str(e))
        res = "unknown"
    return res


def check_c_md5(recdict, fname):
    """Compute an md5 signature after filtering out irrelevant items for C
       code (white space and comments)."""
    try:
        f = open(fname)
    except StandardError, e:
        # Can't open a URL here.
        msg_warning(recdict, (_('Cannot compute md5 checksum for "%s": ')
                                                             % fname) + str(e))
        return "unknown"

    m = hashlib.md5()

    inquote = 0
    incomment = 0
    while 1:
        # Read one line at a time.
        try:
            data = f.readline()
        except StandardError, e:
            # Can't read the file.
            msg_warning(recdict, (_('Cannot read "%s": ') % fname) + str(e))
            return "unknown"

        if not data:
            break

        # Filter out irrelevant changes:
        # - Collapse sequences of white space into one space.
        # - Remove comments.
        # TODO: double-byte characters may have a backslash or double quote
        # as their second byte, how to know this?
        data_len = len(data) - 1
        s = 0
        skipwhite = 1
        i = 0
        while i < data_len:
            if inquote:
                # Only need to search for the endquote.
                while i < data_len:
                    c = data[i]
                    i = i + 1
                    if c == '"':
                        inquote = 0
                        break
                    elif c == '\\':
                        i = i + 1
                continue

            if incomment:
                # Only need to search for the comment end "*/".
                while i < data_len:
                    if data[i] == '*' and data[i + 1] == '/':
                        incomment = 0
                        i = i + 2
                        s = i
                        skipwhite = 1
                        break
                    i = i + 1
                continue

            c = data[i]
            if c == ' ' or c == '\t':
                # White space after non-white: dump text.
                if not skipwhite:
                    m.update(data[s:i] + ' ')

                # Skip white space
                while 1:
                    i = i + 1
                    if i == data_len:
                        break
                    c = data[i]
                    if c != ' ' and c != '\t':
                        break
                s = i
                skipwhite = 0
                if i == data_len:
                    break

            if c == '/' and (data[i + 1] == '/' or data[i + 1] == '*'):
                # Start of // or /* comment.
                if i > s:
                    m.update(data[s:i] + ' ')
                i = i + 1
                if data[i] == '/':
                    s = data_len
                    break
                incomment = 1
            else:
                skipwhite = 0
                if c == "'":
                    # skip '"' or '\'', not the start of a sting
                    if data[i + 1] == '\\':
                        i = i + 1
                    i = i + 2
                elif c == '"':
                    inquote = 1
            i = i + 1

        if not (incomment or skipwhite) and s < data_len:
            m.update(data[s:data_len] + ' ')

    try:
        f.close()
    except:
        # Error while closing a read file???
        pass

    return hexdigest(m)


def buildcheckstr2sign(str):
    """Compute a signature from a string for the buildcheck."""
    return hexdigest(hashlib.md5(str))


def _sign_lookup(signatures, name, key):
    """
    Get the "key" signature for item "name" from dictionary "signatures".
    "name" must have gone through fname_fold().
    """
    if not signatures.has_key(name):
        return ''
    s = signatures[name]
    if not s.has_key(key):
        return ''
    return s[key]


def sign_clear(name):
    """
    Clear the new signatures of an item.
    Store an item to note that it was cleared (see below).
    Used when it has been build.
    """
    new_signatures[name] = {}
    new_signatures[name]["cleared"] = 1


def get_new_sign(recdict, name, check, force = 0):
    """Get the current "check" signature for the item "name".
       "name" is the absolute name for non-virtual nodes.
       This doesn't depend on the target.  "name" can be a URL.
       When "force" is non-zero also use a cleared signature (for --touch).
       Returns a string (also for timestamps)."""
    # When not executing build commands and a target has been pretended to be
    # build, its signature is cleared.  Don't recompute it then, the file will
    # not be different but we do want a different signature.
    name = fname_fold(name)
    if (not force
            and skip_commands()
            and new_signatures.has_key(name)
            and new_signatures[name].has_key("cleared")):
        return "cleared"

    key = check
    res = _sign_lookup(new_signatures, fname_fold(name), key)
    if not res:
        # Compute the signature now
        if check == "time":
            from Remote import url_time
            res = str(url_time(recdict, name))
        elif check == "md5":
            res = check_md5(recdict, name)
        elif check == "c_md5":
            res = check_c_md5(recdict, name)
        # TODO: other checks, defined with actions
        else:
            res = "unknown"

        # Store the new signature to avoid recomputing it many times.
        if not new_signatures.has_key(name):
            new_signatures[name] = {}
        new_signatures[name][key] = res

    return res

def sign_clear_target(recdict, target):
    """Called to clear old signatures after successfully executing build rules
       for "target".  sign_updated() should be called next for each source."""
    get_sign_file(recdict, target, 1)
    target_name = fname_fold(target.get_name())
    if old_signatures.has_key(target_name):
        del old_signatures[target_name]
    if upd_signatures.has_key(target_name):
        del upd_signatures[target_name]


def sign_clear_file(fname, recursive):
    """Called to clear signatures for a file "fname".
       Used for ":changed" and "--changed=FILE"."""
    chd_signatures[full_fname(fname)] = recursive
    if upd_signatures.has_key(fname):
        del upd_signatures[fname]


def sign_clear_all():
    """Clear all computed signatures.  Used when starting to execute a toplevel
       recipe."""
    global old_signatures, upd_signatures, new_signatures, chd_signatures
    global sign_files
    old_signatures = {}
    chd_signatures = {}
    upd_signatures = {}
    new_signatures = {}
    sign_files = {}


def _sign_upd_sign(recdict, target, key, value):
    """Update signature for node "target" with "key" to "value"."""
    get_sign_file(recdict, target, 1)
    target_name = fname_fold(target.get_name())
    if not upd_signatures.has_key(target_name):
        upd_signatures[target_name] = {"signfile":
                                           fname_fold(target.get_sign_fname())}
    upd_signatures[target_name][key] = value
    # Update the timestamp on the target.
    upd_signatures[target_name][timekey] = str(time.time())


def sign_updated(recdict, source, dict, target):
    """Called after successfully executing build rules for node "target" from
    node "source", using check based on dictionary "dict"."""
    name = source.get_name()
    check = check_name(recdict, name, dict, source.attributes)
    res = get_new_sign(recdict, name, check, force = 1)
    _sign_upd_sign(recdict, target, name + '@' + check, res)

    # if the source file was considered changed and recursive attribute used,
    # the target should be as well.
    if chd_signatures.get(fname_fold(name)):
        chd_signatures[fname_fold(target.get_name())] = 1


def buildcheck_updated(recdict, target, value):
    """Called after successfully executing build rules for node "target" with
       the new buildcheck signature "value"."""
    _sign_upd_sign(recdict, target, '@buildcheck', value)


def get_old_sign(recdict, name, check, target, rootname = None):
    """Get the old "check" signature for item "name" and target node "target".
       "name" must be an absolute and normalized path.
       "rootname" is used for publishing and the "--contents" option.
       If it doesn't exist an empty string is returned."""
    # Check if this file was marked as changed.
    name = fname_fold(name)
    if chd_signatures.has_key(name):
        return "changed"

    # May need to read the sign file for this target.
    get_sign_file(recdict, target, 0)

    key = name + '@' + check
    if not rootname:
        # Use the updated signature if it exists, otherwise use the old one.
        name = fname_fold(target.get_name())
        ret = _sign_lookup(upd_signatures, name, key)
        if ret:
            return ret
        return _sign_lookup(old_signatures, name, key)

    # Go through all updated and old signatures to check if "rootname" matches.
    # Find the entry that was updated most recently.
    rootname = fname_fold(rootname)
    rootname_len = len(rootname)
    ret = ''
    newtime = 0
    for sigdict in [upd_signatures, old_signatures]:
        for name in sigdict.keys():
            if (len(name) > rootname_len
                    and name[:rootname_len] == rootname
                    and sigdict[name].has_key(key)
                    and sigdict[name].has_key(timekey)
                    and float(sigdict[name][timekey]) > newtime):
                ret = sigdict[name][key]
                newtime = float(sigdict[name][timekey])

    return ret


def check_name(recdict, name, itemdict, altdict = None):
    """Return the check name to be used for item "name" with dictlist
       "itemdict".  Also use "altdict" if given (attributes of the node)."""
    if itemdict.has_key("check"):
        check = itemdict["check"]
    elif altdict and altdict.has_key("check"):
        check = altdict["check"]
    else:
        # TODO: make mapping from name or filetype to check configurable
        #if itemdict.has_key("filetype"):
        #    type = itemdict["filetype"]
        #else:
        #    type = ft_detect(itemdict["name"])
        if ((itemdict.get("directory")
                    or (altdict and altdict.get("directory")))
                or os.path.isdir(name)):
            check = "none"      # default check for directories: none
        else:
            # default check is given with $DEFAULTCHECK
            check = get_var_val_int(recdict, "DEFAULTCHECK")
    return check


# vim: set sw=4 et sts=4 tw=79 fo+=l:
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.