// HtmlParser.cpp: implementation of the CHtmlParser class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "HTML Rebuilder.h"
#include "HtmlParser.h"

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

CHtmlParser::CHtmlParser()
{
m_Position = NULL;
}

CHtmlParser::~CHtmlParser()
{
Close();
}

bool CHtmlParser::Open(LPCTSTR Filename)
{
Close();
HANDLE FileHandle = CreateFile (Filename,
                         GENERIC_READ/* | GENERIC_WRITE*/,
                         FILE_SHARE_READ,
                         NULL,
                         OPEN_EXISTING,
                         FILE_ATTRIBUTE_NORMAL,
                         NULL);
if (FileHandle == INVALID_HANDLE_VALUE)
    {
    return (false);
    };
DWORD FileSize = GetFileSize (FileHandle, NULL);
DWORD Readed;
::ReadFile (FileHandle, m_Source.GetBuffer(FileSize), FileSize, &Readed, NULL);
m_Source.ReleaseBuffer(FileSize);
::CloseHandle(FileHandle);
if (Readed != FileSize)
    {
    return (false);
    };
m_Position = m_Source;
m_SourceFilename = Filename;
return (true);
}

bool CHtmlParser::Parse()
{
LPCTSTR Contents = m_Position;
for (;Search (_T('<'));)
    {
    int ContentsLength = m_Position - Contents;
    if (ContentsLength)
        {
        OnContents(Contents, ContentsLength);
        };
    m_Position++;
    SkipWhitespace();
    if (*m_Position == _T('!'))
        {
        LPCTSTR SpecialTag = m_Position;
        m_Position++;
        if (_tcsncmp (m_Position, _T("--"), 2) == 0)
            {
            // comment
            m_Position += 2;
            if (!Search (_T("-->")))
                {
                return (false);
                };
            m_Position += 3;
            }
        else
            {
            if (!Search (_T('>')))
                {
                return (false);
                };
            m_Position++;
            };
        OnSpecialTag (SpecialTag, m_Position - SpecialTag - 1);
        }
    else
        {
        // tag
        if (*m_Position == _T('/'))
            {
            // closing tag
            m_Position++;
            LPCTSTR Identifier;
            int IdentifierLength;
            if (!ParseIdentifier(&Identifier, IdentifierLength))
                { 
                return (false);
                };
            SkipWhitespace();
            if (*m_Position != _T('>'))
                {
                return (false);
                };
            m_Position++;
            OnClosingTag (Identifier, IdentifierLength);
            }
        else
            {
            LPCTSTR Identifier;
            int IdentifierLength;
            bool LookForEndingTag = false;
            if (!ParseIdentifier(&Identifier, IdentifierLength))
                {
                return (false);
                };
            OnTag (Identifier, IdentifierLength);
            /*
            CString Trace (Identifier, IdentifierLength);
            TRACE1 ("%s\n", Trace);
            */
            bool Error;
            LPCTSTR AttrName, AttrValue;
            int AttrNameLength, AttrValueLength;
            for (; SkipWhitespace(), ParseAttribute (&AttrName, AttrNameLength, &AttrValue, AttrValueLength, Error);)
                {
                OnAttribute(AttrName, AttrNameLength, AttrValue, AttrValueLength);
                };
            if (Error)
                {
                return (false);
                };
            bool ClosedTag = false;
            if (*m_Position == _T('/'))
                {
                m_Position++;
                ClosedTag = true;
                }
            else
                {
                LookForEndingTag = (_tcsnicmp (Identifier, _T("script"), IdentifierLength) == 0);
                };
            if (*m_Position != _T('>'))
                {
                return (false);
                };
            OnTagClose(Identifier, IdentifierLength, ClosedTag);
            m_Position++;
            if (LookForEndingTag)
                {
                Contents = m_Position;
                if (!SearchForEndingTag (Identifier, IdentifierLength))
                    {
                    return (false);
                    };
                ContentsLength = m_Position - Contents;
                if (ContentsLength)
                    {
                    OnContents(Contents, ContentsLength);
                    };
                OnClosingTag(Identifier, IdentifierLength);
                };
            };
        };
    Contents = m_Position;
    };
return (true);
}

bool CHtmlParser::Search(TCHAR Char)
{
m_Position = _tcschr (m_Position, Char);
return (m_Position != NULL);
}

bool CHtmlParser::Search(LPCTSTR String)
{
m_Position = _tcsstr (m_Position, String);
return (m_Position != NULL);
}

void CHtmlParser::SkipWhitespace()
{
for (;_istspace (*m_Position); m_Position++);
}

bool CHtmlParser::ParseIdentifier(LPCTSTR *Identifier, int &Count)
{
*Identifier = m_Position;
if (!__iscsymf(*m_Position) && (*m_Position != _T('?')))
    {
    Count = 0;
    return (false);
    };
for (m_Position++; __iscsym (*m_Position) || (*m_Position == _T('-')) || (*m_Position == _T(':') || (*m_Position == _T('?'))); m_Position++);
/*
m_N++;
if (m_N == 6485)
    {
    int x = 9;
    };
*/
Count = m_Position - *Identifier;
//TRACE3 ("%li %li %s\n", m_N, int (m_Position - LPCTSTR (m_Source)), Identifier);
return (true);
}

bool CHtmlParser::ParseAttribute(LPCTSTR *AttributeName, int &AttributeNameLength, LPCTSTR *AttributeValue, int &AttributeValueLength, bool &Error)
{
Error = false;
if (!ParseIdentifier(AttributeName, AttributeNameLength))
    {
    return (false);
    };
SkipWhitespace();
if (*m_Position != _T('='))
    {
    *AttributeValue = NULL;
    AttributeValueLength = 0;
    return (true);
    };
m_Position++;
SkipWhitespace();
if (*m_Position == _T('"'))
    {
    m_Position++;
    *AttributeValue = m_Position;
    if (!Search (_T('"')))
        {
        Error = true;
        return (false);
        };
    AttributeValueLength = m_Position - *AttributeValue;
    m_Position++;
    return (true);
    }
else
    {
    *AttributeValue = m_Position;
    for (;;)
        {
        m_Position++;
        if (!_istspace (*m_Position))
            {
            if (*m_Position == _T('>'))
                {
                AttributeValueLength = m_Position - *AttributeValue;
                return (true);
                };
            if ((*m_Position == _T('/')) && (*(m_Position + 1) == _T('>')))
                {
                AttributeValueLength = m_Position - *AttributeValue;
                return (true);
                };
            }
        else
            {
            AttributeValueLength = m_Position - *AttributeValue;
            return (true);
            };
        };
    ASSERT (false);
    };
}

void CHtmlParser::Close()
{
if (m_Position)
    {
    m_SourceFilename = m_Source = LPCTSTR (NULL);
    m_Position = NULL;
    };
}

bool CHtmlParser::SearchForEndingTag(LPCTSTR TagName, int TagNameLength)
{
for (;;)
    {
    if (!Search (_T("</")))
        {
        return (true);
        };
    m_Position += 2;
    SkipWhitespace();
    LPCTSTR CurrTagName;
    int CurrTagLength;
    if (ParseIdentifier(&CurrTagName, CurrTagLength) && (CurrTagLength == TagNameLength) && (_tcsnicmp (CurrTagName, TagName, TagNameLength) == 0))
        {
        SkipWhitespace();
        if (*m_Position == _T('>'))
            {
            m_Position++;
            return (true);
            };
        };
    };
}

void CHtmlParser::OnClosingTag(LPCTSTR TagName, int TagNameLength)
{

}

void CHtmlParser::OnTag(LPCTSTR TagName, int TagNameLength)
{

}

void CHtmlParser::OnAttribute(LPCTSTR Name, int NameLength, LPCTSTR Value, int ValueLength)
{

}

void CHtmlParser::OnTagClose(LPCTSTR TagName, int TagNameLength, bool Ends)
{

}

void CHtmlParser::OnContents(LPCTSTR Contents, int ContentsLength)
{

}

void CHtmlParser::OnSpecialTag(LPCTSTR Tag, int TagLength)
{

}
