# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; version 2 of the
# License.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301  USA

from HTMLParser import HTMLParser, HTMLParseError

class HTMLLookup(HTMLParser):
    def __init__(self):
        self.reset();

    def reset(self):
        HTMLParser.reset(self)

        # Used as a tag stack as teh document is being parsed
        self.stack = []

        # Stores tupes of (tag, condition) to be searched,
        # Condition a list of tuples (attribute, value)
        self.path = []

        # Is a record of the depth at which each tag in the path
        # was found, used for tag matching when finding closing tags
        self.found_path_depth = []

        # Stores the next item in the path to be searched
        self.path_index = 0

        # Maintains a count of the items in the stack
        self.stack_index = 0

        # Flag to publish found data, publishing occurs on all data items
        # found after the path has been found
        self.found_target = False

        # Flag to indicate to quit once the processing has been completed
        self.quit_on_done = False

    def feed(self, data):
        try:
            quit_on_done_backup = self.quit_on_done
            path_backup = self.path
            self.reset()
            self.path = path_backup
            self.quit_on_done = quit_on_done_backup
            HTMLParser.feed(self, data)
        except HTMLParseError, msg:
            if not self.quit_on_done or not "DONE PROCESSING" in msg.msg:
                raise HTMLParseError(msg.msg, self.getpos())
            

    def add_path_node(self, tag):
        self.path.append((tag,[]))

    def add_path_conditioned_node(self, tag, attrs):
        self.path.append((tag, attrs))

    # Matches a tag with the following tag to be searched on
    # the path attribute
    def match_tag(self, tag, attrs):
        ret_val = True;

        # Gets the next path tuple
        next_tuple = self.path[self.path_index]

        # Splits the tuple in Tag and Attributes
        next_tag = next_tuple[0]
        next_attrs = next_tuple[1]

        # Compares the next and found tags
        if next_tag == tag:

            # Ensures all the Attribute tuples in the next path tag
            # exists on the found tag attributes
            for i in range(len(next_attrs)):
                if attrs.count(next_attrs[i]) == 0:
                    ret_val = False

        else:
            ret_val = False

        return ret_val
        
    def handle_starttag(self, tag, attrs):
        # The path index controls the number of items in
        # the search path that have been found
        if self.path_index < len(self.path):
            if self.match_tag(tag, attrs):
                self.found_path_depth.append(self.stack_index)
                self.path_index = self.path_index + 1

            
            if self.path_index == len(self.path):
                self.found_target = True
                self.handle_path_entry(tag, attrs)

        # Any tag is stacked and changes the stack index 
        self.stack.append(tag)
        self.stack_index = self.stack_index + 1

    def handle_endtag(self, tag):
        self.stack_index = self.stack_index - 1

        # Ensures the closing tag matches the one in the stack
        if self.stack[self.stack_index] == tag:
            self.stack.pop()

            # If any path item has been found
            if self.path_index > 0:

                # Verifies if the depth of the closing tag matches with the depth of
                # the last path found item
                if self.stack_index == self.found_path_depth[self.path_index - 1]:

                    # Verifies the closing tag is the same as the last found path item
                    if self.path[self.path_index -1][0] == tag:

                        # Positions the found cursors an item back
                        self.path_index = self.path_index - 1
                        self.found_path_depth.pop()

                        # If the found path is incomplete, stops publishing
                        if self.found_target and self.path_index < len(self.path):
                            self.found_target = False
                            self.handle_path_exit(tag)
                            if self.quit_on_done:
                                self.error("DONE PROCESSING")
                    else:
                        print "Error: end tag not matching tag in search path!"
        else:
            print "Error: end tag not matching start tag : ", tag

    def handle_data(self, data):
        if self.found_target:
            self.handle_found_data(data)

    # Function to handle data found after the complete path has been found
    # An inheriting class should do something with this
    def handle_found_data(self, data):
        pass

    # Function to handle the exact moment where the path being searched has been found
    # An inheriting class should do something with this
    def handle_path_entry(self, tag, attrs):
        pass

    def handle_path_exit(self, tag):
        pass