##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
# 
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# 
##############################################################################
"""
Revision information:
$Id: HTMLClass.py,v 1.2 2002/06/10 19:17:57 chrism Exp $
"""
import string, re, urllib
from cgi import escape
from StructuredText.HTMLWithImages import HTMLWithImages
from TextBlockFormatter import format

################################################################
# Regexes and constants
################################################################

letters = r'\w'
prefix = r'(http|https|ftp|mailto|file|about|wais|gopher)'
curses = r'\/\#\~\:\.\?\+\=\&\%\@\!\\\-\,'
punc = r'\.\?\-\,;'
any = letters + curses + punc
# ?= in 2nd group below is positive lookahead assertion
url = re.compile(r'\b(%s:[%s]+?)(?=[%s]*[^%s]|$)' % (prefix, any, punc, any))


################################################################
# classes and functions that deal with outputting a colorized
# stx document to HTML
################################################################

container_names = ['StructuredTextDescription',
                   'StructuredTextSection',
                   'StructuredTextTable',
                   'StructuredTextExample',
                   'StructuredTextBullet',
                   'StructuredTextNumbered',
                   'StructuredTextParagraph']

class HTMLClass(HTMLWithImages):
    def __init__(self):
        self.element_types = {}
        self.element_types = HTMLWithImages.element_types.copy()
        self.element_types['Comment'] = 'comment'
        self.element_types['CommentableExample'] = 'commentable_example'

    def __call__(self, doc, level=1, base_url='', commentoffers=1,
                 header=1):
        self.header = header
        self._url = base_url
        self._commentoffers = commentoffers
        r=[]
        self.dispatch(doc, level-1, r.append)
        return string.join(r,'')

    def comment(self, doc, level, output):
        for c in doc.getChildNodes():
            output('\n<pre class="comment">\n')
            output(markupComment(c.getNodeValue()))
            output('\n</pre>\n')

    def commentable_example(self, doc, level, output):
        """ renderer is attached by CommentableDocument """
        renderer = doc.renderer
        normal = doc.normal
        childNodes = doc.getChildNodes()
        example = childNodes[0]
        # the first child is the example, we make the assumption
        # that every other child is a comment.
        output(renderer(example.getNodeValue(), normal))
        for node in childNodes[1:]:
            nodeName = node.getNodeName()
            getattr(self, self.element_types[nodeName])(node, level, output)

    def example(self, doc, level, output):
        childNodes = doc.getChildNodes()
        example = childNodes[0]
        # the first child is the example, we make the assumption
        # that every other child is a comment.
        output('\n<pre>\n')
        output(escape(example.getNodeValue()))
        output('\n</pre>\n')
        for node in childNodes[1:]:
            nodeName = node.getNodeName()
            getattr(self, self.element_types[nodeName])(node, level, output)
        
    def paragraph(self, doc, level, output, quote=urllib.quote_plus):
        """ commentable document paragraphs are located via their
        parenting level and the index into the list of paragraphs
        at that parenting level """
        try:
            lv, index = doc.id
        except:
            lv, index = -1, -1
        linktext=('\n<div class="commentoffer">'
                  '<a href="%s/commentForm?level:int=%s&index:int=%s">'
                  '<img src="%s/commentImage" border="off" alt=" Comment ">'
                  '</a></div>\n')
        output('<p><a name="%s-%s"> </a>\n' % (lv, index))
        childNodes = doc.getChildNodes()
        el_types = self.element_types
        # only one text node in here, hell with it just render it
        # and close the paragraph after linking it unless it's an
        # sgml node or a text node without linebreaks
        if len(childNodes) == 1:
            nodeValue = doc.getNodeValue()
            if (childNodes[0].getNodeName() == 'StructuredTextSGML'
                or string.find(nodeValue, '\n') == -1):
                output(nodeValue)
                output('</p>')
                return
            output(doc.getNodeValue())
            if self._commentoffers:
                s = linktext % (self._url, lv, index, self._url)
                output(s)
            output('</p>\n')
            return
        # maybe we've just got a paragraph that has no real subs,
        # just some markup
        childTypes = map(lambda node: node.getNodeName(), childNodes)
        if not intersects(container_names, childTypes):
            for node in childNodes:
                nodeName = node.getNodeName()
                getattr(self, el_types[nodeName])(node, level, output)
            if self._commentoffers:
                s = linktext % (self._url, lv, index, self._url)
                output(s)
            output('</p>\n')
            return
        # otherwise we have a truly composite paragraph
        # and we go through a bunch of bullshit
        i = 0
        for node in childNodes:
            nodeName = node.getNodeName()
            if nodeName in container_names:
                # the following causes inappropriate double-offers,
                # but leaving it out prevents full coverage
                if not i:
                    if self._commentoffers:
                        s = linktext % (self._url, lv, index, self._url)
                        output(s)
                    i = 1
            getattr(self, el_types[nodeName])(node, level, output)
        output('</p>\n')

HTML = HTMLClass()

################################################################
# Utility functions
################################################################

def indentation_plus(t, plus=0, spaces_expr=re.compile(r'^(\s*)').match):
    """ we would just use the indention utility from ST, but it's
    not always there under every ST version """
    m = spaces_expr(t)
    if m:
        start, end = m.span()
        plus = end-start + plus
    return plus

def markupComment(comment):
    # we may break the comment into paragraphs using the "vertical tab"
    # notation that we created in insertComment... we turn these
    # into carriage returns.
    commentlines = []
    indentation = 0
    paragraphs = string.split(comment, chr(11))
    if len(paragraphs) > 1:
        # we have a multiparagraph comment
        for paragraph in paragraphs:
            paragraph = escape(paragraph)
            commentlines.extend(string.split(paragraph, '\n'))
    else:
        # we don't have a multiparagraph comment
        comment = escape(comment)
        commentlines.extend(string.split(comment, '\n'))
    for line in commentlines:
        indentation = indentation_plus(line)
    comment = string.join(commentlines, '\n')
    # run the comment through the formatter in case any lines are
    # silly long ;-)
    comment = format(comment, max_width=110, indent=0)
    comment = url.sub(r'<a href="\1">\1</a>', comment)
    return comment

def intersects(l1, l2):
    for item in l1:
        if item in l2: return 1

