default.py :  » Blog » Frog » FrogComplete-1.8 » webapps » frog » frog » text » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Blog » Frog 
Frog » FrogComplete 1.8 » webapps » frog » frog » text » default.py
#
#   Frog's default markup processor.
#
#   (note: called trough frog.util package, not directly)
#

import sre, urllib, urlparse, cgi, mimetypes, os, math
from frog.text.errors import MarkupSyntaxError

__all__=["contentcleanup","content2html"]


#   Called to 'clean up' entered content, before it is stored in the data store
def contentcleanup(text, environment={}):
    if text:
        return text.strip()
    else:
        return None


newlineRE = sre.compile(r'(?:\r?\n){2,}')
tokenizeRE = sre.compile(r'(.*?)\[(/?[^[]*?(?:=([^[]*))?)\]', sre.DOTALL)

SENTINEL = "/sentinel"


#
# The default markup text parser.
# env *must* contain:
#  "urlprefix" (the url prefix for links) 
#  "userid" (the current blog user id)
#  "filepath" (the root path where statically linked files are stored)
#  "smileys" (if smiley images should be used or not, must be the smiley color index or None)
#
# Not called externally, the content2html function is used instead.
#
class Parser:
    def __init__(self, env={}, comment=False):
        self.parsetree=[]
        self.inparagraph=False
        self.opentags=self.closetags=0
        self.environ=env
        self.comment=comment
        self.initialParagraph()
    def initialParagraph(self):
        self.openParagraph()  # normally, parsed blocks start with a new paragraph.
    def parse(self, matches, closetag=SENTINEL):
        for m in matches:
            txt,tag,extra=m.group(1,2,3)   # 'extra' is the text after a '=' in the tag (with url, for instance)
            if txt:
                txt=self.processText(txt)
                if txt:
                    self.parsetree.append( txt )
            if tag in ('b', 'i', 'tt'):
                self.parsetree.append("<%s>" % tag)
                self.opentags+=1
            elif tag in ('/b', '/i', '/tt'):
                self.parsetree.append("<%s>" % tag)
                self.closetags+=1
            elif tag=='u':
                self.parsetree.append('<span class="underlined">')
                self.opentags+=1
            elif tag=='/u':
                self.parsetree.append('</span>')
                self.closetags+=1
            elif tag=='center':
                self.closeParagraph(False)  # XXX a bit of a hack
                self.parsetree.append('<div style="text-align:center">')
                self.opentags+=1
            elif tag=='/center':
                self.parsetree.append('</div>')
                self.closetags+=1
                self.openParagraph(False)  # XXX a bit of a hack
            elif tag=='/':
                self.parsetree.append("<br />")      # line break
            #elif tag[0] in (" ","\t"):
            #    # ignore this tag, just add it as text.
            #    self.parsetree.append( '['+cgi.escape(tag,True)+']' )    # tags must not be processed
            elif tag.startswith("@:"):
                # article cross-link. Target is not checked!
                self.environ["articlelink"]=tag[2:]
                self.parsetree.append( '<a href="%(urlprefix)suser/%(userid)s/article/%(articlelink)s"><img alt="remote" src="%(urlprefix)simg/article.png" style="padding-bottom:0.5ex" />other article</a>' % self.environ )
            elif tag.startswith("@@"):
                # image forced download link (not embedded)
                filename=tag[2:].strip()
                self.environ["file"]=filename
                mime=mimetypes.guess_type(filename)
                if mime:
                    mime=mime[0] or ""
                stats=self.fileStats(filename)
                if stats:
                    self.environ["icon"]=self.getMimeIcon(mime)
                    self.environ["sizekb"]=int(math.ceil(stats.st_size / 1024.0))
                    self.environ["short"]=os.path.split(filename)[1]
                    self.parsetree.append( '<a href="%(urlprefix)sfiles/%(userid)s/%(file)s" title="Download file, %(sizekb)d Kb" rel="nofollow"><img alt="download" src="%(urlprefix)simg/%(icon)s" style="padding-bottom:0.5ex" />%(short)s</a>' % self.environ )
                elif filename[:5].lower() == "http:": # external link?
                    self.environ["icon"]=self.getMimeIcon(mime)
                    scheme,netloc,path,query,fragment = urlparse.urlsplit(filename)
                    self.environ["short"]=os.path.split( path ) [1] or filename
                    self.environ["netloc"]=netloc
                    self.parsetree.append( '<a href="%(file)s" title="Download file from %(netloc)s" rel="nofollow"><img alt="remote" src="%(urlprefix)simg/remote.gif" style="padding-bottom:0.5ex" /><img alt="download" src="%(urlprefix)simg/%(icon)s" style="padding-bottom:0.5ex" />%(short)s</a>' % self.environ )
                else:
                    self.parsetree.append( '[[!!! BAD LINK: %(file)s !!!]]' % self.environ)

            elif tag.startswith("@"):
                # 'smart' link/embed
                # determine filename and optional extra attrs (separated by | and ,)
                filename=tag[1:].strip()
                    
                filename, attrs = self.parseCustomAttrs(filename)
                self.environ["file"]=filename
                mime=mimetypes.guess_type(filename)
                if mime:
                    mime=mime[0] or ""
                stats=self.fileStats(filename)
                if stats:
                    self.environ["sizekb"]=int(math.ceil(stats.st_size / 1024.0))
                    self.environ["short"]=os.path.split(filename)[1]
                    self.environ["icon"]=self.getMimeIcon(mime)
                    if "alt" not in attrs:
                        attrs["alt"] = "[[image: %s]]" % self.environ["short"]
                    self.environ["attrs"]=' '.join( [ '%s="%s"' % (a,v) for a,v in attrs.items() ] )
                    if mime.startswith("image/"):
                        self.parsetree.append( '<img src="%(urlprefix)sfiles/%(userid)s/%(file)s" %(attrs)s />' % self.environ )
                    else:
                        self.parsetree.append( '<a href="%(urlprefix)sfiles/%(userid)s/%(file)s" title="Download file, %(sizekb)d Kb" rel="nofollow"><img alt="download" src="%(urlprefix)simg/%(icon)s" style="padding-bottom:0.5ex" />%(short)s</a>' % self.environ )
                elif filename[:5].lower() == "http:": # external link?
                    scheme,netloc,path,query,fragment = urlparse.urlsplit(filename)
                    self.environ["short"]=os.path.split( path ) [1] or filename
                    self.environ["netloc"]=netloc
                    if mime.startswith("image/"):
                        if "alt" not in attrs:
                            attrs["alt"] = "[[image: %(short)s from %(netloc)s]]" % self.environ
                        self.environ["attrs"]=' '.join( [ '%s="%s"' % (a,v) for a,v in attrs.items() ] )
                        self.parsetree.append( '<img src="%(file)s" %(attrs)s />' % self.environ )
                    else:
                        self.environ["icon"]=self.getMimeIcon(mime)
                        self.parsetree.append( '<a href="%(file)s" title="Download file from %(netloc)s" rel="nofollow"><img alt="remote" src="%(urlprefix)simg/remote.gif" style="padding-bottom:0.5ex" /><img alt="download" src="%(urlprefix)simg/%(icon)s" style="padding-bottom:0.5ex" />%(short)s</a>' % self.environ )
                else:
                    self.parsetree.append( '[[!!! BAD LINK: %(file)s !!!]]' % self.environ)
            elif tag.startswith("url"):
                self.parsetree.append( self.parseURL(tag,extra,matches) )
            elif tag.startswith("img"):
                self.parsetree.append( self.parseIMG(tag, matches) )
            elif tag=="quote":
                self.closeParagraph()
                self.parsetree.append( Quote(self.environ).parse(matches,"/quote") )
                self.openParagraph()
            elif tag=="code":
                self.closeParagraph()
                self.parsetree.append( Code(self.environ).parse(matches,"/code") )
                self.openParagraph()
            elif tag.startswith("list"):
                self.closeParagraph()
                self.parsetree.append( List(extra,self.environ).parse(matches,"/list") )
                self.openParagraph()
            elif tag==closetag:
                break
            else:
                self.processTag(tag, extra, matches)
        self.closeParagraph()            
        if self.closetags!=self.opentags:
            raise MarkupSyntaxError("close tags don't match open tags")

        return self.makeHTML(self.parsetree)

    def parseCustomAttrs(self, txt):
        attrs={}
        values=txt.split('|',1)
        if len(values)==2:
            txt=values[0]
            values=values[1].split(',')
            for v in values:
                if v.startswith("w=") and len(v)>2:
                    attrs["width"]=v[2:]
                elif v.startswith("h=") and len(v)>2:
                    attrs["height"]=v[2:]
                elif v.startswith("alt=") and len(v)>4:
                    attrs["alt"]=v[4:]
                elif v.startswith("float=") and len(v)>6:
                    attrs["style"]="float: "+v[6:]
                else:
                    raise MarkupSyntaxError("invalid attr: "+v)
        return txt,attrs

    def getMimeIcon(self, mime):
        if mime.startswith("image"): return "picture.gif"
        elif mime.startswith("text"): return "text.gif"
        elif mime.startswith("audio"): return "sound.gif"
        elif mime.startswith("video"): return "movie.gif"
        elif mime.startswith("application/x-sh"): return "script.gif"
        else:  return "disk.gif"

    def openParagraph(self, checkFlag=True):
        if not checkFlag or not self.inparagraph:
            self.parsetree.append( "<p>")
            self.inparagraph=True             

    def closeParagraph(self, clearFlag=True):
        if self.inparagraph:
            if self.parsetree[-1]=="<p>":
                del self.parsetree[-1]  # avoid empty paragraphs <p></p>
            else:
                self.parsetree.append("</p>")
            if clearFlag:
                self.inparagraph=False

    def processTag(self, tag, extra, matches):
        self.parsetree.append( '['+cgi.escape(tag,True)+']' )    # tags must not be processed
        # don't raise an error: raise MarkupSyntaxError("unknown or unmatched tag '%s'" % tag)
    
    def processText(self, txt):
        # don't strip spaces because otherwise certain substrings will be joined together
        txt=self.processSmileys( cgi.escape(txt,True) )
        txt=newlineRE.sub('</p>\n<p>', txt)
        if not self.inparagraph:
            self.inparagraph=True
            txt="<p>"+txt
        return txt

    def processSmileys(self,txt):
        if self.environ["smileycolorstr"]:
            # note: the order is important, and also HTML entities should be used.
            txt=txt.replace("&gt;-(", '<img alt="&gt;(" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/evil.gif" />' % self.environ)
            txt=txt.replace("&gt;-)", '<img alt="&gt;)" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/twisted.gif" />' % self.environ)
            txt=txt.replace("(&gt;)", '<img alt="(&gt;)" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/arrow.gif" />' % self.environ)
            txt=txt.replace("(&lt;)", '<img alt="(&lt;)" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/arrowl.gif" />' % self.environ)
            txt=txt.replace("(?)", '<img alt="(?)" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/question.gif" />' % self.environ)
            txt=txt.replace("(!)", '<img alt="(!)" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/exclaim.gif" />' % self.environ)
            txt=txt.replace("(L)", '<img alt="(L)" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/idea.gif" />' % self.environ)
            txt=txt.replace(":-)", '<img alt=":-)" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/smile.gif" />' % self.environ)
            txt=txt.replace(":-D", '<img alt=":-D" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/biggrin.gif" />' % self.environ)
            txt=txt.replace("^_^", '<img alt="^_^" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/cheesygrin.gif" />' % self.environ)
            txt=txt.replace(":-/", '<img alt=":-/" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/sad.gif" />' % self.environ)
            txt=txt.replace(":-((", '<img alt=":((" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/cry.gif" />' % self.environ)
            txt=txt.replace(":-(", '<img alt=":-(" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/frown.gif" />' % self.environ)
            txt=txt.replace(":-S", '<img alt=":-S" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/confused.gif" />' % self.environ)
            txt=txt.replace(";-)", '<img alt=";-)" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/wink.gif" />' % self.environ)
            txt=txt.replace("8-)", '<img alt="8-)" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/cool.gif" />' % self.environ)
            txt=txt.replace(":-P", '<img alt=":-P" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/tongue.gif" />' % self.environ)
            txt=txt.replace("o_o", '<img alt="o_o" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/eek.gif" />' % self.environ)
            txt=txt.replace(":-|", '<img alt=":-|" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/neutral.gif" />' % self.environ)
            txt=txt.replace(":-&gt;", '<img alt=":-&gt;" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/razz.gif" />' % self.environ)
            txt=txt.replace(":-#", '<img alt=":-#" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/redface.gif" />' % self.environ)
            txt=txt.replace("%-|", '<img alt="%%-|" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/rolleyes.gif" />' % self.environ)
            txt=txt.replace(":-O", '<img alt=":-O" class="smiley" src="%(urlprefix)simg/smileys_%(smileycolorstr)s/surprised.gif" />' % self.environ)
        return txt
    
    def makeHTML(self, parsetree):
        return "".join(parsetree)

    def fileStats(self, filename):
        try:
            return os.stat(os.path.join(self.environ["filepath"], filename))
        except OSError:
            return None

    def parseURL(self, tag, href, matches):
        txt, closetag = matches.next().group(1,2)
        if closetag!="/url":
            raise MarkupSyntaxError("url close tag missing")
        if not href:
            href=txt
        prot, ref = urllib.splittype(href)
        if not prot and not ref.startswith('/'):
            prot="http"
            ref="//"+ref
            href="%s:%s" % (prot,ref)
        href, txt = cgi.escape(href.strip(),True), cgi.escape(txt.strip(),True)
        if self.comment:
            # anti-spam measure; add rel="nofollow" to the link in comments
            # for more info: http://www.google.com/googleblog/2005/01/preventing-comment-spam.html
            return '<a href="%s" rel="nofollow">%s</a>' % (href, txt)
        else:
            return '<a href="%s">%s</a>' % (href,txt)
       
    def parseIMG(self, tag, matches):
        tag, attrs = self.parseCustomAttrs(tag)
        location, closetag = matches.next().group(1,2)
        if closetag!="/img":
            raise MarkupSyntaxError("img close tag missing")
        scheme,netloc,path,query,fragment = urlparse.urlsplit(location)
        short=os.path.split( path ) [1] or filename
        if "alt" not in attrs:
            attrs["alt"] = "[[image %s from %s]]" % (short, netloc)
        attrs=' '.join( [ '%s="%s"' % (a,v) for a,v in attrs.items() ] )
        return '<img src="%s" %s />' % (cgi.escape(location,True), attrs)


class Quote(Parser):
    def __init__(self, env={}):
        Parser.__init__(self,env)
    def makeHTML(self, parsetree):
        self.closeParagraph()
        return "<blockquote>%s</blockquote>" % "".join(parsetree).strip()

class List(Parser):
    def initialParagraph(self):
        pass  # don't open a paragraph in a code block
    def __init__(self, listtype, env={}):
        Parser.__init__(self,env)
        self.type=listtype
        self.in_listitem=False
    def processTag(self, tag, extra, matches):
        if tag=='*':
            #if self.inparagraph:
            #    raise MarkupSyntaxError("loose text inside a list")
            if self.in_listitem:
                self.parsetree.append("</li>\n")
            self.parsetree.append("<li>")
            self.in_listitem=True
        else:
            Parser.processTag(self, tag, extra, matches)
    def processText(self, txt):
        # override this because in lists there are no paragraphs.
        # don't strip spaces because otherwise certain substrings will be joined together
        txt=self.processSmileys( cgi.escape(txt,True) )
        return newlineRE.sub('<br />', txt)

    def makeHTML(self, parsetree):
        if self.in_listitem:
            self.parsetree.append("</li>\n")
        if self.type is None:
            otag=ctag="ul"
        elif self.type=='1':
            otag='ol style="list-style-type: decimal;"'
            ctag='ol'
        elif self.type=='a':
            otag='ol style="list-style-type: lower-alpha;"'
            ctag='ol'
        else:
            raise MarkupSyntaxError("invalid list type '%s'" % self.type)
        return "<%s>%s</%s>" % (otag, "".join(parsetree).strip(), ctag)

class Code(Parser):
    def initialParagraph(self):
        pass  # don't open a paragraph in a code block
    def parse(self, matches, closetag="/code"):
        for m in matches:
            txt, tag = m.group(1,2)
            if txt:
                self.parsetree.append( cgi.escape(txt,True) )
            if tag==closetag:
                break
            else:
                self.parsetree.append( '['+cgi.escape(tag,True)+']' )    # tags must not be processed
        return self.makeHTML(self.parsetree)
    def makeHTML(self, parsetree):
        return "<pre>%s</pre>" % "".join(parsetree).strip()


# Main entry function:

def content2html(text,env={},comment=False):
    matches=tokenizeRE.finditer(text+"[%s]"%SENTINEL)
    if env["smileys"] is not None:
        env["smileycolorstr"] = ["yellow", "red", "blue"] [env["smileys"]]    # FIXED ORDER
    else:
        env["smileycolorstr"] = None
    parser=Parser(env,comment)
    return parser.parse(matches)


    
def test():
    env={"urlprefix":"/frog/", "userid":"irmen", "filepath":"/home/irmen", "smileys":1 }
    #text="[b]b[/b]\n\n[i]i[/i]\n\n[tt]tt[/tt]"
    #text= contentcleanup(text, env )
    #print content2html(text,env)
    text="          gewoon [b]bold[/b] [i]italic[/i] [u]underline[/u] [tt]typewriter[/tt] [b]bold2[/b] gewoon       "
    print "-----"
    text= contentcleanup(text, env )
    print content2html(text,env)
    text="""[list]
[*] in een listitem gaat whitespace verloren:
[*] bla [b]bold[/b] [i]italic[/i]
[b]same line[/b]

[tt]diff line[/tt] bloe
[/list]"""
    text= contentcleanup(text, env )
    print content2html(text,env)
        

if __name__=="__main__":
    test()

www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.