
#include "stdafx.h"
#include "fastsearch.h"


fastsearch::fastsearch()
{
	m_kmpNext = NULL; // KMP preprocessing
	m_bmGs = NULL; // Boyer Moore preprocessing

	Init(NULL,NULL);

	SetCaseSensitive(TRUE);
	SetAlgorithm(cruntime);
}

fastsearch::~fastsearch()
{
	RemoveKMP();
	RemoveBoyerMoore();
}

//
// Accessors
//
void fastsearch::SetCaseSensitive(BOOL bUseCase)
{
	m_bUseCase = bUseCase;
}
BOOL fastsearch::GetCaseSensitive()
{
	return m_bUseCase;
}	

void fastsearch::SetAlgorithm(enumSearchAlgorithm alg)
{
	m_nAlgorithm = alg;
}
enumSearchAlgorithm fastsearch::GetAlgorithm()
{
	return m_nAlgorithm;
}


//
// Methods
//
void fastsearch::Init(/*in*/LPTSTR pattern, /*in*/LPTSTR buffer)
{
	m_cpPattern = pattern;
	m_cpPtr = m_cpBuffer = buffer;

	RemoveKMP();
	RemoveBoyerMoore();
}

// Count 
//
// count all occurences
// don't use this API in critical cases, it really performs a full search.
long fastsearch::Count() // count all occurences
{
	long nb = 0;

	PreSearch();

	while ( Next() )
		nb++;

	PostSearch();

	return nb;
}

LPTSTR fastsearch::First() // find first occurence
{
	PreSearch();
	return Next();
}

LPTSTR fastsearch::Next() // find next occurence
{
	LPTSTR p = NULL;

	switch ( m_nAlgorithm )
	{
		case cruntime : p = Search_CRuntime(); Forward(p); return p;
		case naive : p =  Search_Naive(); Forward(p); return p;
		case kmp : p =  Search_KMP(); Forward(p); return p;
		case boyermoore : p =  Search_BM(); Forward(p); return p;
	}

	return NULL;
}




// Helpers


void fastsearch::Forward(LPTSTR p) // forward cursor to look for next pattern
{
	m_cpPtr = p + _tcslen(m_cpPattern);
}

void fastsearch::PreSearch()
{
	if (m_nAlgorithm == kmp)
	{
		RemoveKMP();
		long m = (long) _tcslen(m_cpPattern);
		m_kmpNext = new long[m+1];
		if (!m_kmpNext)
			return;

		// see reference implementation here :
		//   http://www-igm.univ-mlv.fr/~lecroq/string/node8.html#SECTION0080

		long i, j;

		i = 0;
		j = m_kmpNext[0] = -1;
		while (i < m) {
			while (j > -1 && m_cpPattern[i] != m_cpPattern[j])
				j = m_kmpNext[j];
			i++;
			j++;
			if (m_cpPattern[i] == m_cpPattern[j])
				m_kmpNext[i] = m_kmpNext[j];
			else
				m_kmpNext[i] = j;
		}

	}
	else if (m_nAlgorithm == boyermoore)
	{

		// see reference implementation here :
		//   http://www-igm.univ-mlv.fr/~lecroq/string/node14.html#SECTION00140

		RemoveBoyerMoore();
		long m = (long) _tcslen(m_cpPattern);
		m_bmGs = new long[m+1];
		if (!m_bmGs)
			return;

		// preBmBC implementation
		long i;
 
		// initialize the offset to move the pattern to anytime we'll bump onto a foreign character
		for (i = 0; i < 256; ++i) // TODO : add support to MBCS/Unicode
			m_bmBc[i] = m;
		// other offsets (default value)
		for (i = 0; i < m - 1; ++i)
			m_bmBc[ m_cpPattern[i] ] = m - i - 1;
 
		// suffixes implementation
		long f, g, j;
		long* suff = new long[m+1];
		if (!suff)
			return;
 
		suff[m - 1] = m;
		g = m - 1;
		for (i = m - 2; i >= 0; --i) {
			if (i > g && suff[i + m - 1 - f] < i - g)
				suff[i] = suff[i + m - 1 - f];
			else {
				if (i < g)
					g = i;
				f = i;
				while (g >= 0 && m_cpPattern[g] == m_cpPattern[g + m - 1 - f])
					--g;
				suff[i] = f - g;
			}
		}

		// preBmGs implementation
		for (i = 0; i < m; ++i)
			m_bmGs[i] = m;
		j = 0;
		for (i = m - 1; i >= -1; --i)
			if (i == -1 || suff[i] == i + 1)
				for (; j < m - 1 - i; ++j)
					if (m_bmGs[j] == m)
					m_bmGs[j] = m - 1 - i;
		for (i = 0; i <= m - 2; ++i)
			m_bmGs[ m - 1 - suff[i] ] = m - 1 - i;
 
		delete [] suff;

		m_nBufferLen = (long) _tcslen(m_cpBuffer);

	} // end if (m_nAlgorithm == boyermoore)
}

LPTSTR fastsearch::Search_CRuntime()
{
	return _tcsstr(m_cpPtr, m_cpPattern);
}

LPTSTR fastsearch::Search_Naive()
{
	while ( *m_cpPtr )
	{
		LPTSTR p = m_cpPattern;
		long i = 0;
		while (*(m_cpPtr+i) && *p == *(m_cpPtr+i))
		{
			p++;
			i++;
		}

		if (*p == 0)
			return m_cpPtr;

		m_cpPtr++;
	}
	return NULL;
}


LPTSTR fastsearch::Search_KMP()
{
	if (!m_kmpNext) return NULL;

	// see reference implementation here :
	//   http://www-igm.univ-mlv.fr/~lecroq/string/node8.html#SECTION0080
	
	long m = (long) _tcslen(m_cpPattern);
	long i = 0;

	while ( *m_cpPtr ) {
		while (i > -1 && *(m_cpPattern + i) != *m_cpPtr)
			i = m_kmpNext[i];

		i++;
		m_cpPtr++;
		if (i >= m) {
			return m_cpPtr - m;
			i = m_kmpNext[i];
		}
	}

	return NULL;
}

LPTSTR fastsearch::Search_BM()
{
	if (!m_bmGs) return NULL;

	// see reference implementation here :
	//   http://www-igm.univ-mlv.fr/~lecroq/string/node14.html#SECTION00140

	long i;
	long m = (long) _tcslen(m_cpPattern);
	long n = m_nBufferLen;
	long j = 0;

	while ( j <= n - m ) {

		for (i = m - 1; i >= 0 && m_cpPattern[i] == m_cpPtr[i + j]; --i);

		if (i < 0) {
		 return m_cpPtr + j;
         j += m_bmGs[0];
		}
		else
			j += max(m_bmGs[i], m_bmBc[ m_cpPtr[i + j] ] - m + 1 + i);
	}

	return NULL;
}

void fastsearch::PostSearch()
{
}

void fastsearch::RemoveKMP()
{
	delete [] m_kmpNext;
	m_kmpNext = NULL;
}

void fastsearch::RemoveBoyerMoore()
{
	delete [] m_bmGs;
	m_bmGs = NULL;
}
