Filetype.py :  » Build » A-A-P » aap-1.091 » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Build » A A P 
A A P » aap 1.091 » Filetype.py
#! /usr/bin/env python
#
# Part of the A-A-P project: File type detection module

# Copyright (C) 2002-2003 Stichting NLnet Labs
# Permission to copy and use this file is specified in the file COPYING.
# If this file is missing you can find it here: http://www.a-a-p.org/COPYING

# This module detects the type of a file.
# It can be run as a separate program or called from Python.
# Many types are recognized by default.  More types can be added dynamically.
# See the Aap reference manual for an explanation.
#
#
# EXTERNAL INTERFACE:
#
# ft_detect(fname [, ignore] [, recdict])
#                           Detects the type of file "fname".
#
# ft_check_dir(dir [, errmsg] [, recdict])
#                           Scan directory "dir" for "*.afd" files, which are
#                           loaded with ft_read_file().
#
# ft_read_file(fname [, recdict])
#                           Read file "fname" for detection rules.
#
# ft_add_rules(str, lnum [, recdict])
#                           Add file type detection rules from "str".  See
#                           the Aap reference manual for the syntax.
#
# ft_known(type)            Returns True if "type" is a known filetype, False
#                           otherwise.
#
# ft_declare(type)          Declare "type" to be a known filetype.
#

import string
import os.path
import sys

import import_re        # import the re module in a special way

import glob

from Util import *

# Make a copy of the recdict after these imports, so that they can be used when
# executing Python snippets.
exec_recdict = globals().copy()

# Set to non-zero when run as a program.
_run_as_program = 0

#
# The default list of detected file types by suffix.
#
_def_suffix_list = [
        ("aap", "aap"),
        ("abc", "abc"),
        ("abl", "abel"),
        ("wrm", "acedb"),
        ("ada", "ada"),
        ("adb", "ada"),
        ("ads", "ada"),
        ("afd", "afd"),
        ("tdf", "ahdl"),
        ("aml", "aml"),
        ("run", "ampl"),
        ("a", "asm"),
        ("asm", "asm"),
        ("lst", "asm"),
        ("mac", "asm"),
        ("s", "asm"),
        ("asn", "asn"),
        ("asn1", "asn"),
        ("asa", "aspvbs"),
        ("as", "atlas"),
        ("atl", "atlas"),
        ("ave", "ave"),
        ("awk", "awk"),
        ("imp", "b"),
        ("mch", "b"),
        ("ref", "b"),
        ("bc", "bc"),
        ("bdf", "bdf"),
        ("bib", "bib"),
        ("bl", "blank"),
        ("btm", "btm"),
        ("c", "c"),
        ("cdl", "cdl"),
        ("cfi", "cf"),
        ("cfm", "cf"),
        ("chs", "chaskell"),
        ("eni", "cl"),
        ("dcl", "clean"),
        ("icl", "clean"),
        ("prg", "clipper"),
        ("cbl", "cobol"),
        ("cob", "cobol"),
        ("cpy", "cobol"),
        ("c++", "cpp"),
        ("cc", "cpp"),
        ("cpp", "cpp"),
        ("cxx", "cpp"),
        ("h", "cpp"),
        ("hh", "cpp"),
        ("hpp", "cpp"),
        ("hxx", "cpp"),
        ("inl", "cpp"),
        ("tcc", "cpp"),
        ("cs", "cs"),
        ("csc", "csc"),
        ("csh", "csh"),
        ("tcsh", "csh"),
        ("csp", "csp"),
        ("fdr", "csp"),
        ("css", "css"),
        ("con", "cterm"),
        ("pld", "cupl"),
        ("si", "cuplsim"),
        ("cyn", "cynpp"),
        ("d", "d"),
        ("def", "def"),
        ("desc", "desc"),
        ("diff", "diff"),
        ("patch", "diff"),
        ("rej", "diff"),
        ("bat", "dosbatch"),
        ("cmd", "dosbatch"),
        ("sys", "dosbatch"),
        ("ini", "dosini"),
        ("dot", "dot"),
        ("drac", "dracula"),
        ("drc", "dracula"),
        ("dsl", "dsl"),
        ("dtd", "dtd"),
        ("dylan", "dylan"),
        ("intr", "dylanintr"),
        ("lid", "dylanlid"),
        ("ecd", "ecd"),
        ("am", "elf"),
        ("erl", "erlang"),
        ("EC", "esqlc"),
        ("ec", "esqlc"),
        ("exp", "expect"),
        ("4gh", "fgl"),
        ("4gl", "fgl"),
        ("m4gl", "fgl"),
        ("fex", "focexec"),
        ("focexec", "focexec"),
        ("fs", "forth"),
        ("ft", "forth"),
        ("F", "fortran"),
        ("f", "fortran"),
        ("f77", "fortran"),
        ("f90", "fortran"),
        ("f95", "fortran"),
        ("for", "fortran"),
        ("fpp", "fortran"),
        ("ftn", "fortran"),
        ("gdmo", "gdmo"),
        ("mo", "gdmo"),
        ("ged", "gedcom"),
        ("gif", "gif"),
        ("gpi", "gnuplot"),
        ("gp", "gp"),
        ("gsp", "gsp"),
        ("hs", "haskell"),
        ("hb", "hb"),
        ("errsum", "hercules"),
        ("ev", "hercules"),
        ("rs", "hercules"),
        ("sum", "hercules"),
        ("vc", "hercules"),
        ("h32", "hex"),
        ("hex", "hex"),
        ("hog", "hog"),
        ("rules", "hog"),
        ("htm", "html"),
        ("htm", "html"),
        ("html", "html"),
        ("html", "html"),
        ("shtml", "html"),
        ("html.m4", "htmlm4"),
        ("icn", "icon"),
        ("idl", "idl"),
        ("Z", "ignore"),
        ("bak", "ignore"),
        ("bz2", "ignore"),
        ("gz", "ignore"),
        ("in", "ignore"),
        ("new", "ignore"),
        ("old", "ignore"),
        ("orig", "ignore"),
        ("rmpnew", "ignore"),
        ("rpmsave", "ignore"),
        ("indent.pro", "indent"),
        ("INF", "inform"),
        ("inf", "inform"),
        ("iss", "iss"),
        ("ist", "ist"),
        ("mst", "ist"),
        ("jpl", "jam"),
        ("jpr", "jam"),
        ("jav", "java"),
        ("java", "java"),
        ("jj", "javacc"),
        ("jjt", "javacc"),
        ("javascript", "javascript"),
        ("js", "javascript"),
        ("clp", "jess"),
        ("jgr", "jgraph"),
        ("jpg", "jpeg"),
        ("png", "png"),
        ("properties", "jproperties"),
        ("jsp", "jsp"),
        ("kix", "kix"),
        ("ks", "kscript"),
        ("k", "kwt"),
        ("ACE", "lace"),
        ("ace", "lace"),
        ("latte", "latte"),
        ("lte", "latte"),
        ("l", "lex"),
        ("lex", "lex"),
        ("lhs", "lhaskell"),
        ("ll", "lexpp"),
        ("cl", "lisp"),
        ("el", "lisp"),
        ("jl", "lisp"),
        ("lisp", "lisp"),
        ("lsp", "lisp"),
        ("lite", "lite"),
        ("lt", "lite"),
        ("lgt", "logtalk"),
        ("lot", "lotos"),
        ("lotos", "lotos"),
        ("lou", "lout"),
        ("lout", "lout"),
        ("sig", "lprolog"),
        ("lss", "lss"),
        ("lua", "lua"),
        ("mc", "m4"),
        ("eml", "mail"),
        ("dsp", "make"),
        ("mak", "make"),
        ("mk", "make"),
        ("man", "man"),
        ("mpl", "maple"),
        ("mv", "maple"),
        ("mws", "maple"),
        ("mason", "mason"),
        ("mhtml", "mason"),
        ("mel", "mel"),
        ("mf", "mf"),
        ("mgp", "mgp"),
        ("mib", "mib"),
        ("mms", "mmix"),
        ("moc", "moc"),
        ("DEF", "modula2"),
        ("MOD", "modula2"),
        ("m2", "modula2"),
        ("md", "modula2"),
        ("mi", "modula2"),
        ("i3", "modula3"),
        ("ig", "modula3"),
        ("m3", "modula3"),
        ("mg", "modula3"),
        ("isc", "monk"),
        ("monk", "monk"),
        ("ssc", "monk"),
        ("tsc", "monk"),
        ("moo", "moo"),
        ("mp", "mp"),
        ("msql", "msql"),
        ("mush", "mush"),
        ("mysql", "mysql"),
        (".NSA", "natural"),
        (".NSC", "natural"),
        (".NSG", "natural"),
        (".NSL", "natural"),
        (".NSM", "natural"),
        (".NSN", "natural"),
        (".NSP", "natural"),
        (".NSS", "natural"),
        ("ncf", "ncf"),
        ("nqc", "nqc"),
        ("OPL", "opl"),
        ("OPl", "opl"),
        ("Opl", "opl"),
        ("dpr", "pascal"),
        ("g", "pccts"),
        ("inc", "php"),
        ("ml", "ocaml"),
        ("mli", "ocaml"),
        ("mll", "ocaml"),
        ("mly", "ocaml"),
        ("mm", "nroff"),
        ("nr", "nroff"),
        ("nsi", "nsis"),
        ("o", "object"),
        ("obj", "object"),
        ("opl", "opl"),
        ("or", "openroad"),
        ("ora", "ora"),
        ("papp", "papp"),
        ("pas", "pascal"),
        ("php", "php"),
        ("php", "php"),
        ("pl", "perl"),
        ("pxml", "papp"),
        ("pxsl", "papp"),
        ("roff", "nroff"),
        ("sho", "dllobject"),
        ("sob", "dllobject"),
        ("tr", "nroff"),
        ("xin", "omnimark"),
        ("xom", "omnimark"),
        ("php3", "php"),
        ("phtml", "phtml"),
        ("lpc", "pike"),
        ("pike", "pike"),
        ("pmod", "pike"),
        ("ulpc", "pike"),
        ("rcp", "pilrc"),
        ("p36", "plm"),
        ("pac", "plm"),
        ("plm", "plm"),
        ("plp", "plp"),
        ("pls", "plsql"),
        ("plsql", "plsql"),
        ("po", "po"),
        ("pod", "pod"),
        ("eps", "postscript"),
        ("ps", "postscript"),
        ("pov", "pov"),
        ("ppd", "ppd"),
        ("ih", "ppwiz"),
        ("it", "ppwiz"),
        ("pdb", "prolog"),
        ("psf", "psf"),
        ("py", "python"),
        ("py", "python"),
        ("mat", "radiance"),
        ("rad", "radiance"),
        ("rc", "rc"),
        ("rex", "rexx"),
        ("rexx", "rexx"),
        ("x", "rpcgen"),
        ("rpl", "rpl"),
        ("rtf", "rtf"),
        ("rbw", "ruby"),
        ("rbw", "ruby"),
        ("sas", "sas"),
        ("sa", "sather"),
        ("scm", "scheme"),
        ("sci", "scilab"),
        ("pdl", "sdl"),
        ("pr", "sdl"),
        ("sed", "sed"),
        ("sgm", "sgml"),
        ("sgml", "sgml"),
        ("bash", "sh"),
        ("ebuild", "sh"),
        ("env", "sh"),
        ("ksh", "sh"),
        ("sh", "sh"),
        ("sh", "sh"),
        ("sim", "simula"),
        ("s85", "sinda"),
        ("sin", "sinda"),
        ("il", "skill"),
        ("sl", "slang"),
        ("score", "slrnsc"),
        ("tpl", "smarty"),
        ("smith", "smith"),
        ("smt", "smith"),
        ("sml", "sml"),
        ("sno", "snobol4"),
        ("spec", "spec"),
        ("sp", "spice"),
        ("spice", "spice"),
        ("spd", "spup"),
        ("spdata", "spup"),
        ("speedup", "spup"),
        ("pkb", "sql"),
        ("pks", "sql"),
        ("sql", "sql"),
        ("tyb", "sql"),
        ("tyc", "sql"),
        ("typ", "sql"),
        ("sqlj", "sqlj"),
        ("sqi", "sqr"),
        ("sqr", "sqr"),
        ("s19", "srec"),
        ("s28", "srec"),
        ("s37", "srec"),
        ("cls", "st"),
        ("st", "st"),
        ("stp", "stp"),
        ("tak", "tak"),
        ("itcl", "tcl"),
        ("itk", "tcl"),
        ("tar", "tar"),
        ("tar.bz2", "tarbz2"),
        ("tar.gz", "targz"),
        ("tgz", "targz"),
        ("tcl", "tcl"),
        ("tk", "tcl"),
        ("ti", "terminfo"),
        ("dtx", "tex"),
        ("latex", "tex"),
        ("ltx", "tex"),
        ("sty", "tex"),
        ("tex", "tex"),
        ("texi", "texinfo"),
        ("texinfo", "texinfo"),
        ("txi", "texinfo"),
        ("tf", "tf"),
        ("t.html", "tilde"),
        ("tli", "tli"),
        ("slt", "tsalt"),
        ("tsscl", "tsscl"),
        ("tssgm", "tssgm"),
        ("tssop", "tssop"),
        ("uc", "uc"),
        ("ui", "ui"),
        ("uil", "uil"),
        ("uit", "uil"),
        ("ctl", "vb"),
        ("dsm", "vb"),
        ("sba", "vb"),
        ("vbs", "vb"),
        ("v", "verilog"),
        ("hdl", "vhdl"),
        ("vbe", "vhdl"),
        ("vhd", "vhdl"),
        ("vhdl", "vhdl"),
        ("vst", "vhdl"),
        ("vim", "vim"),
        ("hw", "virata"),
        ("module", "virata"),
        ("pkg", "virata"),
        ("wrl", "vrml"),
        ("wm", "webmacro"),
        ("wbt", "winbatch"),
        ("wml", "wml"),
        ("doc", "word"),
        ("wsc", "wsh"),
        ("wsf", "wsh"),
        ("ad", "xdefaults"),
        ("msc", "xmath"),
        ("msf", "xmath"),
        ("xpm2", "xpm2"),
        ("xs", "xs"),
        ("xsd", "xsd"),
        ("xsl", "xslt"),
        ("y", "yacc"),
        ("yy", "yaccpp"),
        ("zip", "zip"),
        ("z8a", "z8a"),
]

#
# The default list of detected file types by regexp.
# The order matters here!  Last item is checked first.
#
_def_regexp_list = [
        ("[cC]hange[lL]og", "changelog", 1),
        ("/var/named/", "bindzone", 0),
        ("crontab", "crontab", 1),
        (".*\\drac\\.", "dracula", 0),
        (".*fvwmrc", "fvwm", 0),
        (".*fvwm95", "fvwm", 0),
        (".*fvwm2rc", "fvwm", 0),
        ("\\.gtkrc", "gtkrc", 1),
        ("gtkrc", "gtkrc", 1),
        ("Prl.*\\.", "jam", 1),
        ("JAM.*\\.", "jam", 1),
        ("[mM]akefile", "make", 1),
        ("muttrc", "muttrc", 1),
        ("tmac\\.", "nroff", 1),
        (".*printcap", "printcap", 0),
        (".*termcap", "termcap", 0),
        (".*vimrc", "vim", 0),
        ("Xresources", "xdefaults", 1),
        (".*/app-defaults/", "xdefaults", 0),
        (".*/Xresources/", "xdefaults", 0),
        ("XF86Config", "xf86conf", 1),
        (".*xmodmap", "xmodmap", 0),
        ("zsh", "zsh", 1),
        ("zlog", "zsh", 1),
        ("xdm-config$", "xdefaults", 1),
        ("\\.Xresources$", "xdefaults", 1),
        ("\\.Xpdefaults$", "xdefaults", 1),
        ("\\.Xdefaults$", "xdefaults", 1),
        ("XF86Config$", "xf86conf", 1),
        ("cvs\\d+$", "cvs", 1),
        ("wvdial\\.conf$", "wvdial", 1),
        ("\\wgetrc$", "wget", 1),
        ("\\.wgetrc$", "wget", 1),
        ("vgrindefs$", "vgrindefs", 1),
        ("\\.viminfo", "viminfo", 1),
        ("\\_viminfo", "viminfo", 1),
        (".*\\.vhdl_[0-9]*$", "vhdl", 0),
        ("\\tidyrc$", "tidy", 1),
        ("\\.tidyrc$", "tidy", 1),
        ("texmf\\.cnf$", "texmf", 1),
        ("tags$", "tags", 1),
        ("squid\\.conf$", "squid", 1),
        ("vision\\.conf$", "hog", 1),
        ("snort\\.conf$", "hog", 1),
        ("\\.lrnrc", "slrnrc", 1),
        ("screenrc$", "screen", 1),
        ("\\.screenrc$", "screen", 1),
        ("\\.zcompdump", "zsh", 1),
        ("\\.zfbfmarks$", "zsh", 1),
        ("\\.zprofile$", "zsh", 1),
        ("\\.zlog", "zsh", 1),
        ("\\.zsh", "zsh", 1),
        ("csh\\.logout$", "csh", 1),
        ("csh\\.login$", "csh", 1),
        ("csh\\.cshrc$", "csh", 1),
        ("\\.alias", "csh", 1),
        ("\\.tcshrc", "csh", 1),
        ("\\.cshrc", "csh", 1),
        ("\\.login", "csh", 1),
        ("\\.profile", "sh", 1),
        ("/etc/profile", "sh", 0),
        ("\\.kshrc", "sh", 1),
        ("\\.bashrc", "sh", 1),
        ("bashrc", "sh", 1),
        ("bash\\.bashrc", "sh", 1),
        ("\\.bash_profile", "sh", 1),
        ("\\.bash_logout", "sh", 1),
        ("sgml\\.catalog", "catalog", 1),
        ("catalog$", "catalog", 1),
        ("sendmail\\.cf", "sendmail", 1),
        ("smb\\.conf", "samba", 1),
        ("robots.txt", "robots", 1),
        ("\\.reminders", "remind", 1),
        ("\\.inputrc$", "readline", 1),
        ("\\.ratpoisonrc$", "ratpoison", 1),
        ("\\ratpoisonrc$", "ratpoison", 1),
        ("\\.procmail$", "procmail", 1),
        ("\\.procmailrc$", "procmail", 1),
        (".*printcap$", "printcap", 0),
        (".*termcap$", "termcap", 0),
        ("\\.povrayrc$", "povini", 1),
        ("main.cf$", "pfmain", 1),
        ("\\.pinerc$", "pine", 1),
        ("\\pinerc$", "pine", 1),
        ("\\.muttrc", "muttrc", 1),
        ("\\.mutt/muttrc", "muttrc", 1),
        ("Muttrc$", "muttrc", 1),
        ("[mM]akefile$", "make", 1),
        ("GNUmakefile$", "make", 1),
        ("snd.\\d+$", "mail", 1),
        ("\\.letter$", "mail", 1),
        ("\\.letter\\.\\d+$", "mail", 1),
        ("\\.followup$", "mail", 1),
        ("\\.article$", "mail", 1),
        ("\\.article\\.\\d+$", "mail", 1),
        ("\\pico\\.\\d+$", "mail", 1),
        ("\\mutt-.*-\\d+$", "mail", 1),
        ("\\mutt\\w{6}$", "mail", 1),
        ("\\ae\\d+\\.txt$", "mail", 1),
        ("/tmp/SLRN[0-9A-Z.]+$", "mail", 0),
        ("\\.emacs$", "lisp", 1),
        ("\\.sawfishrc$", "lisp", 1),
        ("lilo.conf", "lilo", 1),
        ("lftp.conf$", "lftp", 1),
        ("\\.lftprc$", "lftp", 1),
        (".*lftp/rc$", "lftp", 0),
        (".*properties_..$", "jproperties", 0),
        (".*properties_.._..$", "jproperties", 0),
        (".*properties_.._.._.*$", "jproperties", 0),
        ("inittab$", "inittab", 1),
        ("\\.gtkrc$", "gtkrc", 1),
        ("gtkrc$", "gtkrc", 1),
        ("gkrellmrc_.$", "gkrellmrc", 1),
        ("gkrellmrc$", "gkrellmrc", 1),
        ("\\.gdbinit$", "gdb", 1),
        ("fstab$", "fstab", 1),
        ("auto.master$", "conf", 1),
        ("exports$", "exports", 1),
        ("filter-rules$", "elmfilt", 1),
        (".*lvs$", "dracula", 0),
        (".*lpe$", "dracula", 0),
        ("debian/control$", "debcontrol", 1),
        (".*\\.\\.ch$", "ch", 0),
        ("named\\.conf$", "named", 1),
        ("named\\.root$", "bindzone", 1),
        ("build\\.xml$", "ant", 1),
        (".*vimrc$", "vim", 0),
        (".*exrc$", "vim", 0),
        ("configure$", "sh", 1),
        ("configure.ac$", "config", 1),
        (".*COPYING$", "text", 0),
        (".*README$", "text", 0),
        (".*read.me$", "text", 0),
        ("proftpd\\.conf", "apachestyle", 1),
        ("httpd\\.conf", "apache", 1),
        ("srm\\.conf", "apache", 1),
        ("access\\.conf", "apache", 1),
        ("apache\\.conf", "apache", 1),
        ("\\.htaccess$", "apache", 1),
        (".*enlightenment/.*\\.cfg$", "c", 0),
        (".*Eterm/.*\\.cfg$", "eterm", 0),
        ("lynx\\.cfg$", "lynx", 1),
        (".*baseq[2-3]/.*\\.cfg$", "quake", 0),
        (".*id1/.*\\.cfg$", "quake", 0),
        (".*quake[1-3]/.*\\.cfg$", "quake", 0),
        ("crontab$", "crontab", 1),
]

#
# The default list of detected file types by script name.
#
_def_script_list = [
        (".*\\bpython", "python"),
        (".*\\bperl", "perl"),
        (".*\\bphp", "php"),
        (".*\\bruby", "ruby"),
        (".*\\bbc\\b", "bc"),
        (".*\\bsed\\b", "sed"),
        (".*\\bocaml", "ocaml"),
        (".*awk\\b", "awk"),
        (".*wml\\b", "wml"),
        (".*\\bksh\\b", "sh"),
        (".*\\bsh\\b", "sh"),
        (".*\\bbash", "sh"),
        (".*csh\\b", "csh"),
        (".*\\bzsh\\b", "zsh"),
        (".*\\btclsh\\b", "tcl"),
        (".*\\bwish\\b", "tcl"),
        (".*\\bexpectk\\b", "tcl"),
        (".*\\bitclsh\\b", "tcl"),
        (".*\\bitwish\\b", "tcl"),
        (".*\\bexpect\\b", "expect"),
        (".*\\bgnuplot\\b", "gnuplot"),
        (".*make\\b", "make"),
]

#
# The default list of detected file types with Python code.
#
_def_python_list = [
        ("am", 0,
"""    # Use Python to avoid the .am suffix is recognized
    if string.lower(fname_base) == "makefile.am":
        type = "automake"
"""),

        ("bas,frm", 0,
"""    if string.lower(fname[-3:]) == "frm":
        type = "form"
    else:
        type = "basic"
    f = open(fname)
    l = ''
    try:
        for i in xrange(1,5):
            l = l + f.readline()
    except:
        pass
    f.close()
    if re.search("VB_Name|Begin VB\\\\.(Form|MDIForm|UserControl)", l, re.I):
        type = "vb"
"""),

        ("ch", 0,
"""    type = "ch"
    f = open(fname)
    try:
        for i in xrange(1,10):
            if f.readline()[0] == '@':
                type = "change"
                break
    except:
        pass
    f.close()
"""),

        ("e,E", 0,
"""    type = "eiffel"
    f = open(fname)
    try:
        for i in xrange(1,100):
            if cre_match("\\\\s*(<'|'>)\\\\s*$", f.readline()):
                type = "specman"
                break
    except:
        pass
    f.close()
"""),

        ("ent", 0,
"""    type = "dtd"
    f = open(fname)
    try:
        for i in xrange(1,6):
            l = f.readline()
            if cre_match("\\\\s*[#{]", l):
                type = "cl"
                break
            if not cre_match("\\\\s*$", l):
                break
    except:
        pass
    f.close()
"""),

        ("rul", 0,
"""    type = "diva"
    f = open(fname)
    try:
        for i in xrange(1,6):
            if string.find("InstallShield", f.readline()):
                type = "ishd"
                break
    except:
        pass
    f.close()
"""),

        ("com", 0,
"""    type = "dcl"
    f = open(fname)
    try:
        l1 = f.readline() + f.readline()
        l2 = f.readline() + f.readline()
        if (cre_search("\\\\$ORIGIN|\\\\$TTL|IN\\\\s*SOA", l1)
                or cre_search("BIND.*named", l1 + l2)):
            type = "dns"
    except:
        pass
    f.close()
"""),

        ("in", 0,
"""    # Use Python to avoid the .in suffix is recognized
    if fname_base == "configure.in":
        type = "config"
"""),

        ("m", 0,
"""    type = "matlab"
    f = open(fname)
    try:
        for i in xrange(1,10):
            l = f.readline()
            if cre_match("\\\\s*#(include|import)", l):
                type = "objc"
                break
            if cre_match("\\\\s*%", l):
                break
            if cre_match("\\\\s*\\\\(\\\\*", l):
                type = "mma"
                break
    except:
        pass
    f.close()
"""),

        ("mod", 0,
"""    type = "modsim3"
    f = open(fname)
    try:
        if cre_search("\\\\bmodule\\\\b", f.readline()):
            type = "lprolog"
    except:
        pass
    f.close()
"""),

        ("1,2,3,4,5,6,7,8,9,t,ms", 0,
"""    f = open(fname)
    found = 0
    try:
        for i in xrange(1,5):
            l = f.readline()
            if not l:
                break
            if l[0] == '.':
                type = "nroff"
                found = 1
                break
    except:
        pass
    f.close()
    if not found:
        if fname[-1] == 't':
            type = "tads"
        elif fname[-1] == 's':
            type = "xmath"
"""),

        ("pl", 0,
"""    type = "perl"
    f = open(fname)
    try:
        while 1:
            l = f.readline()
            if l:
                break
    except:
        pass
    f.close()
    if (cre_search("\\\\bprolog\\\\b|:-", l)
            or cre_match("\\\\s*(%+(\\\\s|$)|/\\\\*)", l)):
        type = "prolog"
"""),

        ("pm", 0,
"""    type = "perl"
    f = open(fname)
    try:
        l = f.readline()
    except:
        pass
    f.close()
    if cre_search("XPM2", l):
        type = "xpm2"
    elif cre_search("XPM", l):
        type = "xpm"
"""),

        ("inc", 0,
"""    type = "php"
    f = open(fname)
    l = ''
    try:
        for i in xrange(1,3):
            l = l + f.readline()
    except:
        pass
    f.close()
    if cre_search("perlscript", l):
        type = "aspperl"
    elif cre_search("<%", l):
        type = "aspvbs"
    elif cre_search("<?", l):
        type = "php"
    else:
        type = "asm"    # could also be "pov", how to check?
"""),

        ("w", 0,
"""    type = "cweb"
    f = open(fname)
    try:
        if cre_search("&ANALYZE", f.readline()):
            type = "progress"
        else:
            f.readline()
            if cre_search("&GLOBAL-DEFINE", f.readline()):
                type = "progress"
    except:
        pass
    f.close()
"""),

        ("i", 0,
"""    type = asm
    f = open(fname)
    found = 0
    try:
        for i in xrange(1,10):
            l = f.readline()
            if l[0] == '*' or cre_match("\\\\s*;", l):
                found = 1
                break
            if not cre_match("\\\\s*$", l) or cre_match("/\\\\*", l):
                break
    except:
        pass
    f.close()
    if not found:
        type = "progress"
"""),

        ("p", 0,
"""    type = "pascal"
    f = open(fname)
    found = 0
    try:
        for i in xrange(1,10):
            l = f.readline()
            if cre_match("\\\\s*((program|procedure|function|const|type|var)\\\\b|{)", l):
                found = 1
                break
            if not cre_match("\\\\s*$", l) or cre_match("/\\\\*", l):
                break
    except:
        pass
    f.close()
    if not found:
        type = "progress"
"""),

        ("reg", 0,
"""    f = open(fname)
    try:
        if cre_match("REGEDIT[0-9]*\\\\s*$", f.readline()):
            type = "registry"
    except:
        pass
    f.close()
"""),

        ("r", 0,
"""    type = "rexx"
    f = open(fname)
    try:
        if cre_match("REBOL", f.readline()):
            type = "rebol"
    except:
        pass
    f.close()
"""),

        ("decl,dcl,dec", 0,
"""    f = open(fname)
    try:
        l = f.readline() + f.readline() + f.readline()
        if cre_match("<!SGML", l):
            type = "sgmldecl"
    except:
        pass
    f.close()
"""),

        ("smil", 0,
"""    type = "smil"
    f = open(fname)
    try:
        if cre_search("<?\\\\s*xml.*?>", f.readline()):
            type = "xml"
    except:
        pass
    f.close()
"""),

        ("smi", 0,
"""    type = "mib"
    f = open(fname)
    try:
        if cre_search("\\\\bsmil\\\\b", f.readline()):
            type = "smil"
    except:
        pass
    f.close()
"""),

        ("web", 0,
"""    type = "winbatch"
    f = open(fname)
    try:
        for i in xrange(0,5):
            if f.readline()[0] == '%':
                type = "web"
                break
    except:
        pass
    f.close()
"""),

        ("xpm", 0,
"""    type = "xpm"
    f = open(fname)
    try:
        if cre_search("XPM2", f.readline()):
            type = "xpm2"
    except:
        pass
    f.close()
"""),

        ("xml", 0,
"""    type = "xml"
"""),

        ("", 0,
"""  while 1:
    if fname == "INDEX" or fname == "INFO":
        f = open(fname)
        try:
            if cre_match("\\\\s*(distribution|installed_software|root|bundle|product)\\\\s*$", f.readline()):
                type = "psf"
                f.close()
                break
        except:
            pass
        f.close()
    if string.find("jarg", fname):
        f = open(fname)
        try:
            for i in xrange(0,5):
                if re.search("THIS IS THE JARGON FILE", f.readline(), re.I):
                    type = "jargon"
                    break
        except:
            pass
        f.close()
    break
"""),

        ("", 1,
"""    if ignore and fname[-1] == '~':
        type = ft_detect(fname[:-1], 1)
"""),

        ("", 1,
"""    f = open(fname)
    line1 = f.readline()
    lines = ['', line1, '', '', '', '']
    for i in xrange(2, 6):
        try:
            lines[i] = f.readline()
        except:
            break
    if line1 and line1[0] == ':' and line1[1] == '\\\\n':
        type = "sh"
    elif cre_match("#(compdef|autoload)\\\\b", line1):
        type = "zsh"
    elif cre_match("From [a-zA-Z][a-zA-Z_0-9\\\\.=-]*(@[^ ]*)? .*[12][09]\\\\d\\\\d$", line1):
        type = "mail"
    elif cre_match("<[%&].*>", line1):
        type = "mason"
    elif cre_match('" *[vV]im$', line1):
        type = "vim"
    elif cre_match("\\\\*\\\\* LambdaMOO Database, Format Version", line1):
        type = "moo"
    elif (cre_match("diff\\\\b|Only in |\\\\d+(,\\\\d+)?[cda]\\\\d+\\\\b|# It was generated by makepatch |Index:\\\\s+\\\\S+$|==== //\\\\S+#\\\\d+", line1)
            or (cre_match("--- ", line1) and cre_match("+++ ", lines[2]))
            or (cre_match("\\\\*\\\\*\\\\* ", line1) and cre_match("--- ", lines[2]))):
        type = "diff"
    elif cre_match("%!\\\\s*PS", line1):
        type = "postscript"
    elif (cre_match("\\\\s*dnl\\\\b", line1)
            or cre_match("\\\\s*dnl\\\\b", lines[2])
            or cre_match("\\\\s*dnl\\\\b", lines[3])
            or cre_match("\\\\s*dnl\\\\b", lines[4])
            or cre_match("\\\\s*dnl\\\\b", lines[5])):
        type = "m4"
    elif re.match(" *proc[nd] *$", line1, re.I):
        type = "sicad"
    elif cre_match("\\\\*\\\\*\\\\* Purify", line1):
        type = "purifylog"
    elif cre_search("<\\\\?\\\\s*xml.*\\\\?>", line1):
        type = "xml"
    elif cre_match("[0-9a-fA-F]{7}: [0-9a-fA-F]{2} [0-9a-fA-F]{2} [0-9a-fA-F]{2} [0-9a-fA-F]{2} ", line1):
        type = "xxd"
    elif cre_match("RCS file:", line1) or cre_match("RCS file:", lines[2]):
        type = "rcslog"
    elif cre_match("CVS:", lines[2]):
        type = "cvs"
    elif cre_match("SEND-PR:", line1):
        type = "sendpr"
    elif cre_match("SNNS network definition file", line1):
        type = "snnsnet"
    elif cre_match("SNNS pattern definition file", line1):
        type = "snnspat"
    elif cre_match("SNNS result file", line1):
        type = "snnsres"
    elif (cre_match("%.*?[Vv]irata", line1)
            or cre_match("%.*?[Vv]irata", lines[2])
            or cre_match("%.*?[Vv]irata", lines[3])
            or cre_match("%.*?[Vv]irata", lines[4])
            or cre_match("%.*?[Vv]irata", lines[5])):
        type = "virata"
    elif cre_match("[0-9]* *execve\\\\(", line1):
        type = "strace"
    elif (cre_search("K & K  Associates", lines[4])
            or cre_search("TAK 2000", lines[2])):
        type = "takout"
    elif cre_search("S Y S T E M S   I M P R O V E D ", lines[3]):
        type = "sindaout"
        # takcmp and sindacmp skipped
    elif (cre_search("\\\\$ORIGIN|\\\\$TTL|IN\\\\s*SOA", line1 + lines[2])
            or cre_search("BIND.*named", line1 + lines[2] + lines[3] + lines[4])):
        type = "dns"
    elif ((cre_search("\\\\|\\\\*{1,80}", line1)
                    and cre_search("VRC ", lines[2]))
            or (cre_search("\\\\|\\\\*{1,80}", lines[2])
                    and cre_search("VRC ", lines[3]))):
        type = "baan"
    elif cre_match("==\\\\d+== valgrind", line1):
        type = "valgrind"
    else:
        line = None
        for i in xrange(1,6):
            if not cre_match("\\\\? ", lines[i]):
                line = lines[i]
                break
        if not line:
            while 1:
                try:
                    l = f.readline()
                    if not cre_match("\\\\? ", l):
                        line = l
                        break
                except:
                    break
        if line and cre_match("Index:\\\\s+\\\\S+$", line):
            type = "diff"
    f.close()
"""),

        ("mas,master", 1,
"""    type = "master"
"""),

        ("m4", 1,
"""    type = "m4"
"""),

        ("me", 1,
"""    type = "nroff"
"""),

        ("txt", 1,
"""    type = "text"
"""),

        ("inp", 1,
"""    f = open(fname)
    try:
        l = f.readline()
        if l[0] == '*':
            type = "abaqus"
        else:
            for i in xrange(1, 500):
                if len(l) >= 19 and string.lower(l[:19]) == "header surface data":
                    type = "trasys"
                    break
                l = f.readline()
    except:
        pass
    f.close()
"""),

        ("asp", 1,
"""    type = "aspvbs"
    f = open(fname)
    try:
        l = f.readline()
        l = l + f.readline()
        l = l + f.readline()
    except:
        pass
    if string.find("perlscript", string.lower(l)) >= 0:
        type = "aspperl"
    f.close()
"""),

        ("cfg", 1,
"""    type = "cfg"
"""),
]

#
# The extra list of detected file types for case sensitive systems.
#
if os.name == "posix":
    _case_detect_list = """
    suffix L lisp
    suffix C cpp
    suffix H cpp
    """

# List of _Ft_py objects: Python code executed to detect file type.
# Used first.
_py_list_before = []

# Dictionary used to map file name extension to file type.
_suffix_dict = {}

# List of _Ft_re objects; a match of the RE with the file name defines the file
# type.
_regexp_list = []

# List of _Ft_re objects: a match of the RE with the script in the first line
# of the file defines the file type.
_script_list = []

# List of _Ft_py objects: Python code executed to detect file type.
# Used after everything else didn't detect the type.
_py_list_after = []

# The detected file types are cached.  This assumes the file type doesn't
# change while executing recipes.  Would this every be false?
# Index in the list is "ignore".
_cache_dict = [{}, {}]


# Dictionary of known filetypes (only the keys are important)
_filetype_dict = {}

# List of types from the builtin python scripts.
# Generated with the following shell command:
#
# grep 'type[[:space:]]*=[[:space:]]*"' Filetype.py | \
#  sed -e 's,""",,' | \
#  sed -e 's,[^"]*",,' -e 's,".*,,' | \
#  sort | uniq \
#  sed -e 's,^,  ",' -e 's/$/",/'
#
# This is used to pre-populate _filetype_dict.  Update this list if the list of
# builtin Python detected-types changes.

_filetype_pre_list = [
  "abaqus",
  "asm",
  "aspperl",
  "aspvbs",
  "automake",
  "baan",
  "basic",
  "cfg",
  "ch",
  "change",
  "cl",
  "config",
  "cvs",
  "cweb",
  "dcl",
  "diff",
  "diva",
  "dns",
  "dtd",
  "eiffel",
  "form",
  "ishd",
  "jargon",
  "lprolog",
  "m4",
  "mail",
  "mason",
  "master",
  "matlab",
  "mib",
  "mma",
  "modsim3",
  "moo",
  "nroff",
  "objc",
  "pascal",
  "perl",
  "php",
  "postscript",
  "progress",
  "prolog",
  "psf",
  "purifylog",
  "rcslog",
  "rebol",
  "registry",
  "rexx",
  "sendpr",
  "sgmldecl",
  "sh",
  "sicad",
  "sindaout",
  "smil",
  "snnsnet",
  "snnspat",
  "snnsres",
  "specman",
  "strace",
  "tads",
  "takout",
  "text",
  "trasys",
  "valgrind",
  "vb",
  "vim",
  "virata",
  "web",
  "winbatch",
  "xmath",
  "xml",
  "xpm",
  "xpm2",
  "xxd",
  "zsh",
# End of the list of grepped types.

# The remainder of these types is internal to AAP and cannot be detected.
  "libobject",
  "ltobject"
]

_did_init = 0       # non-zero when __init__() did its work

def __init__():
    global _suffix_dict, _regexp_list, _script_list
    global _py_list_before, _py_list_after
    global _did_init
    global _filetype_dict

    # this only needs to be done once
    if _did_init:
        return
    _did_init = 1

    _py_list_before = []
    _suffix_dict = {}
    _regexp_list = []
    _script_list = []
    _py_list_after = []
    _filetype_dict = {}

    # Load the built-in detection rules.
    _add_suffixlist(_def_suffix_list)
    _add_regexplist(_def_regexp_list)
    _add_scriptlist(_def_script_list)
    _add_pythonlist(_def_python_list)
    if os.name == "posix":
        ft_add_rules(_case_detect_list, 1)

    # Load detection rules from system and user *.afd files.
    for dirpath in default_dirs({}):
        ft_check_dir(os.path.join(dirpath, "afd"))

    # Declare all the filetypes known from the builtin Python bits
    for i in _filetype_pre_list:
  _filetype_dict[i] = 1

class DetectError(Exception):
    """Error for something gone wrong."""
    def __init__(self, args = None):
        Exception.__init__(self)
        self.args = args

def ft_known(type):
    """Return True when "type" is a known filetype."""
    __init__()
    return _filetype_dict.has_key(type)

def ft_declare(type):
    """Delcare "type" to be a known filetype."""
    __init__()
    _filetype_dict[type] = 1

def ft_check_dir(dir, errmsg = 0, recdict = None):
    """Check directory "dir" for *.afd files and load them.
       When "errmsg" is non-zero give an error message when the directory
       doesn't exist."""
    if os.path.exists(dir) and os.path.isdir(dir):
        for f in glob.glob(os.path.join(dir, "*.afd")):
            try:
                ft_read_file(f, recdict)
            except DetectError, e:
                if _run_as_program:
                    print str(e)
                else:
                    from Message import msg_error
                    msg_error(recdict, str(e))
    elif errmsg:
        e = _('Directory does not exist: "%s"') % dir
        if _run_as_program:
            print e
        else:
            from Message import msg_error
            msg_error(recdict, e)


def ft_read_file(fname, recdict = None):
    """Read file "fname" for file type detection rules."""
    try:
        fd = open(fname)
    except IOError, e:
        raise DetectError, (_('Cannot open "%s": ') % fname) + str(e)
    try:
        s = fd.read()
    except IOError, e:
        raise DetectError, (_('Cannot read "%s": ') % fname) + str(e)
    fd.close()

    ft_add_rules(s, 1, recdict)


def ft_add_rules(dtstr, recipe_line_nr, recdict = None):
    """Add file type detection rules from string "dtstr".
       "recipe_line_nr" is the first line number in a recipe, zero when not
       reading a recipe."""
    # Always load the default rules first (skipped when done already).
    __init__()

    # Split the string into individual lines.
    lines = string.split(dtstr, '\n')

    # Loop over all the lines (may use more than one for python items).
    # Note: using skip_white() and skip_to_white() is avoided here for speed.
    line_idx = 0
    line_count = len(lines)
    while line_idx < line_count:
        line = lines[line_idx]

        # isolate first word: type of detection.
        items = string.split(line, None, 1)

        # ignore empty and comment lines
        if len(items) < 1 or items[0][0] == '#':
            line_idx = line_idx + 1
            continue

        itype = items[0]
        if len(items) < 2:
            rline = ''
        else:
            rline = items[1]
        rline_len = len(rline)
        # isolate first argument, which may be in quotes
        astart = 0
        if astart < rline_len:
            if rline[astart] == '"' or rline[astart] == "'":
                quote = rline[astart]
                astart = astart + 1
                aend = astart
                while aend < rline_len and rline[aend] != quote:
                    aend = aend + 1
                if aend == rline_len:
                    raise DetectError, (_('Missing quote in line %d: "%s"')
                                           % (line_idx + recipe_line_nr, line))
                n = aend + 1
            else:
                aend = astart
                while aend < rline_len and rline[aend] != ' ' and rline[aend] != '\t':
                    aend = aend + 1
                n = aend
            arg1 = rline[astart:aend]
        else:
            arg1 = ''
            n = rline_len

        # Isolate further arguments (no quotes!).
        # A superfluous argument is silently ignored (could be a comment).
        args = string.split(rline[n:])
        if len(args) >= 1:
            arg2 = args[0]
        else:
            arg2 = ''
        if len(args) >= 2:
            arg3 = args[1]
        else:
            arg3 = ''
        if len(args) >= 3:
            arg4 = args[2]
        else:
            arg4 = ''

        if ((itype in ["suffix", "regexp", "script"] and not arg2)
                or (itype == "declare" and not arg1)):
            raise DetectError, (_('Missing argument in line %d: "%s"')
                                           % (line_idx + recipe_line_nr, line))

  # Just declare a filetype
  if itype == "declare":
      _filetype_dict[arg1] = 1

  # Filetype file file suffix
        elif itype == "suffix":
            _add_suffix(arg1, arg2)

  # Filetype based on a regex match of the filename
        elif itype == "regexp":
            _add_regexp(arg1, arg2, arg3 == "tail" or arg4 == "tail",
                                          arg3 == "append" or arg4 == "append")

  # Filetype based on checking the #! line for an interpreter
        elif itype == "script":
            _add_script(arg1, arg2, arg3 and arg3 == "append")

  # Magic python-based filetype detection
        elif itype == "python":
            append = 0
            after = 0
            suffix = None
            for arg in [arg1, arg2, arg3]:
                if arg:
                    if arg == "append":
                        append = 1
                    elif arg == "after":
                        after = 1
                    elif not suffix:
                        suffix = arg
                    else:
                        raise DetectError, (
                                _('Illegal argument in line %d: "%s"')
                                           % (line_idx + recipe_line_nr, line))

            start_indent = get_indent(line)
            line_idx = line_idx + 1
            start_line_idx = line_idx + recipe_line_nr
            cmds = ""
            while line_idx < line_count:
                line = lines[line_idx]
                if get_indent(line) <= start_indent:
                    # Ignore empty and comment lines.
                    i = skip_white(line, 0)
                    if i < len(line) and line[i] != '#':
                        line_idx = line_idx - 1 # this line has next item
                        break
                cmds = cmds + line + '\n'
                line_idx = line_idx + 1
            if not cmds:
                    raise DetectError, (_('Python commands missing in line %d')
                                                 % (line_idx + recipe_line_nr))
            _add_python(cmds, _("filetype detection; python code at line %d: ")
                                       % start_line_idx, after, append, suffix)

        else:
            raise DetectError, (
                       _('Illegal item "%s" in argument to ft_add_rules(): %s')
                                                               % (itype, line))

        line_idx = line_idx + 1


class _Ft_re:
    """Class used to store pairs of RE and file type."""
    def __init__(self, regexp, type, tail):
        self.re = regexp
        self.type = type
        self.tail = tail        # match tail of filename
        self.cre = None

    def comp(self):
        """Get the compiled regexp, cache the result."""
        try:
            self.cre = re.compile(self.re)
        except StandardError, e:
            raise DetectError, (_('Error in filetype detection regexp "%s": ')
                                                            % self.re) + str(e)


class _Ft_py:
    """Class used to store Python code for detecting a file type."""
    def __init__(self, code, suffix, error_msg):
        self.code = code                # the Python code as a string
        self.ccode = None               # the compiled Python code
        self.suffix = suffix            # the list of required suffixes or None
        self.error_msg = error_msg      # a message used for errors

    def compile(self):
        if not self.ccode:
            # DEBUG
            # print "compling for suffix: ", self.suffix
            # print "compiling code: ", self.code

            # Prepend "if 1:" to get the indenting right.
            if self.code[0] == ' ' or self.code[0] == '\t':
                tcode = "if 1:\n" + self.code 
            else:
                tcode = self.code 
            try:
                self.ccode = compile(tcode, 'filetype detection rules', 'exec')
            except StandardError, e:
                raise DetectError, (_('Error in Python code (%s): ')
                                                     % self.error_msg) + str(e)

def _add_suffix(suf, type):
    """Add detection of "type" by file name extension "suf".
       When "type" is "ignore" it means the suffix is removed and further
       detection done on the rest.
       When "type" is "remove" an existing detection for "suf" is removed."""
    if type == 'remove':
        if _suffix_dict.has_key(suf):
            del _suffix_dict[suf]
    else:
        _suffix_dict[suf] = type
        _filetype_dict[type] = 1


def _add_suffixlist(list):
    """Add suffix rules from a list of suffix-type tuples."""
    for suf, itype in list:
        _suffix_dict[suf] = itype
  _filetype_dict[itype] = 1


def _add_regexp(regexp, type, tail, append):
    """Add detection of "type" by matching the file name with Python regular
       expression "regexp".
       When append is non-zero, add to the end of the regexp rules.
       When "type" is "remove" an existing detection for "regexp" is removed."""
    if type == 'remove':
        for r in _regexp_list:
            if r.re == regexp:
                _regexp_list.remove(r)
    else:
        f = _Ft_re(regexp, type, tail)
        if append:
            _regexp_list.append(f)
        else:
            _regexp_list.insert(0, f)
  _filetype_dict[type] = 1


def _add_regexplist(list):
    """Add regexp rules from a list of regexp-type-tail tuples."""
    for regexp, itype, tail in list:
        _add_regexp(regexp, itype, tail, 0)


def _add_script(regexp, type, append):
    """Add detection of "type" by matching the script name in the first line of
       the file with Python regular expression "regexp".
       When append is non-zero, add to the end of the script rules.
       When "type" is "remove" an existing detection for "regexp" is removed."""
    if type == 'remove':
        for r in _script_list:
            if r.re == regexp:
                _script_list.remove(r)
    else:
        f = _Ft_re(regexp, type, 0)
  _filetype_dict[type] = 1
        if append:
            _script_list.append(f)
        else:
            _script_list.insert(0, f)


def _add_scriptlist(list):
    """Add script rules from a list of scriptname-type tuples."""
    for regexp, itype in list:
        _add_script(regexp, itype, 0)


def _add_python(code, error_msg, after, append, suffix):
    """Add detection of "type" by using Python code "code".
       Each line in "code" must end in a '\n'.
       "error_msg" is printed when executing the code results in an error.
       When "after" is non-zero use this rule after suffix, regexp and script
       rules.
       When append is non-zero, add to the end of the python rules."""
    if suffix:
        l = string.split(suffix, ',')
    else:
        l = []
    p = _Ft_py(code, l, error_msg)
    if after:
        ilist = _py_list_after
    else:
        ilist = _py_list_before
    if append:
        ilist.append(p)
    else:
        ilist.insert(0, p)


def _add_pythonlist(list):
    """Add python rules from a list of type-after-script tuples."""
    msg = _("default rule")
    for suffix, after, script in list:
        _add_python(script, msg, after, 0, suffix)



def _exec_py(fname, item, ignore):
    """Execute the code defined with _add_python()."""
    # Make a completely fresh recdict dictionary.
    exec_recdict["fname"] = fname
    exec_recdict["fname_base"] = os.path.basename(fname)
    exec_recdict["ft_detect"] = ft_detect
    exec_recdict["ignore"] = ignore
    if exec_recdict.has_key("type"):
        del exec_recdict["type"]

    item.compile()
    try:
        exec item.ccode in exec_recdict, exec_recdict
    except IOError, e:
        pass        # ignore errors for reading the file
    except StandardError, e:
        raise DetectError, _(item.error_msg) + str(e)

    if exec_recdict.has_key("type"):
        return exec_recdict["type"]
    return None


def ft_detect(fname, ignore = 0, recdict = None):
    """Detect the file type for file "fname".
       Returns the type as a string or None."""
    # return quickly when already detected before
    if _cache_dict[ignore].has_key(fname):
        return _cache_dict[ignore][fname]

    if os.path.isdir(fname):
        _cache_dict[ignore][fname] = "directory"
        return "directory"

    # Internationalisation inits: setlocale and gettext.
    i18n_init()

    # Initialize (will skip when done already)
    __init__()

    # On non-Posix systems we ignore case differences by making the name lower
    # case.
    fname = fname_fold(fname)

    # Do the early python code checks.  May first check if the suffix matches.
    i = string.rfind(fname, ".")
    if i > 0:
        suffix = fname[i + 1:]
    else:
        suffix = ''
    for p in _py_list_before:
        if not p.suffix or suffix in p.suffix:
            atype = _exec_py(fname, p, ignore)
            if atype:
                _cache_dict[ignore][fname] = atype
                return atype

    # Try the extension, this is fastest.
    # When "fname" has several extensions, try with all of them first, then
    # try by removing the first ones:  "f.html.c": "html.c" then ".c".
    bn = os.path.basename(fname)
    i = string.find(bn, ".")
    while i > 0 and i + 1 < len(bn):
        # Found a dot that's not the first or last character.
        if _suffix_dict.has_key(bn[i + 1:]):
            ft = _suffix_dict[bn[i + 1:]]
            if ft == "ignore" and ignore:
                # remove an ignored extension and detect with that
                ft = ft_detect(fname[:-(len(bn[i:]))], 1, recdict)
            _cache_dict[ignore][fname] = ft
            return ft
        i = string.find(bn, ".", i + 1)

    # match all defined REs with the file name.
    # TODO: handle "/" in RE and fname.
    for r in _regexp_list:
        if not r.cre:
            r.comp()
        if r.tail:
            if r.cre.match(bn):
                _cache_dict[ignore][fname] = r.type
                return r.type
        else:
            if r.cre.match(fname):
                _cache_dict[ignore][fname] = r.type
                return r.type

    # match all defined REs with the script name in the first line of the
    # file.
    try:
        f = open(fname)
        line = f.readline()
        f.close()
    except:
        # Errors for files that can't be read are ignored.
        pass
    else:
        if len(line) > 2 and line[:2] == "#!":
            # TODO: remove "env VAR=val" and script arguments from line
            text = line[2:]
            for r in _script_list:
                if not r.cre:
                    r.comp()
                if r.cre.match(text):
                    _cache_dict[ignore][fname] = r.type
                    return r.type

    # Do the python code checks.  May first check if the suffix matches.
    i = string.rfind(bn, ".")
    if i > 0:
        suffix = bn[i + 1:]
    else:
        suffix = ''
    for p in _py_list_after:
        if not p.suffix or suffix in p.suffix:
            atype = _exec_py(fname, p, ignore)
            if atype:
                _cache_dict[ignore][fname] = atype
                return atype

    _cache_dict[ignore][fname] = None
    return None


def filetype_root(ft):
    """When "ft" contains an underscore, return the part before the underscore.
       This is the basic filetype for user-defined filetypes.
       Return None otherwise."""
    i = string.find(ft, '_')
    if i > 0:
        return ft[:i]
    return None


# When executed as a program, detect the type of the specified file.
if __name__ == '__main__':

    # Internationalisation inits: setlocale and gettext.
    i18n_init()

    items = []
    checkfile = None
    _run_as_program = 1

    # Check for any "-Idir", "-I dir", "-ffile" and "-f file" arguments.
    next_is_dir = 0
    next_is_file = 0
    for arg in sys.argv[1:]:
        if next_is_dir:
            items.extend({"dir" : arg})
            next_is_dir = 0
        elif next_is_file:
            items.extend({"file" : arg})
            next_is_file = 0
        elif len(arg) >= 2 and arg[:2] == "-I":
            if len(arg) > 2:
                items.extend({"dir" : arg[2:]})
            else:
                next_is_dir = 1
        elif len(arg) >= 2 and arg[:2] == "-f":
            if len(arg) > 2:
                items.extend({"file" : arg[2:]})
            else:
                next_is_file = 1
        else:
            if checkfile:
                print _("Can only check one file")
                sys.exit(1)
            checkfile = arg

    if next_is_dir:
        print _("-I argument must be followed by a directory name")
        sys.exit(1)
    if next_is_file:
        print _("-f argument must be followed by a file name")
        sys.exit(1)

    if not checkfile:
        print _("Usage: %s [-I ruledir] [-f rulefile] filename") % sys.argv[0]
        sys.exit(1)

    # load the built-in default rules
    __init__()

    # Check specified directories for *.afd files and read specified files.
    for item in items:
        if item.has_key("dir"):
            ft_check_dir(item["dir"])
        else:
            try:
                ft_read_file(item["file"])
            except DetectError, e:
                print e

    try:
        type = ft_detect(sys.argv[1])
        if type == "ignore":
            print ft_detect(sys.argv[1], 1), "(ignored suffix)"
        else:
            print ft_detect(sys.argv[1])
    except DetectError, e:
        sys.stderr.write("Detection error: " + str(e))


# vim: set sw=4 et sts=4 tw=79 fo+=l:
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.