// This program demonstrates the use of a traditional DAWG as applied to in-order anagramming, where the data structure really shines.

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define MAX 15
#define MAX_INPUT 120
#define BIG_IT_UP -32
#define DAWG_DATA "Dawg_For_Lexicon.dat"
#define WORD_LIST_OUTPUT "Dawg_Word_List.txt"

// These values define the format of the "Dawg" node encoding.
#define LETTER_BIT_SHIFT 25
#define LETTER_BIT_MASK 1040187392
#define CHILD_INDEX_BIT_MASK 33554431
#define END_OF_WORD_BIT_MASK 2147483648
#define END_OF_LIST_BIT_MASK 1073741824

// Define the boolean type as an enumeration.
typedef enum {FALSE = 0, TRUE = 1} Bool;
typedef Bool* BoolPtr;

// When reading strings from a file, the new-line character is appended, and this macro will remove it before processing.
#define CUT_OFF_NEW_LINE(string) (string[strlen(string) - 1] = '\0')

// For speed, define these two simple functions as macros.  They modify the "LettersToWorkWith" string in the recursive anagrammer.
#define REMOVE_CHAR_FROM_STRING(thestring, theposition, shiftsize) ( memmove(thestring + theposition, thestring + theposition + 1, shiftsize) )
#define INSERT_CHAR_IN_STRING(thestring, theposition, thechar, shiftsize) ( (memmove(thestring + theposition + 1, thestring + theposition, shiftsize)), (thestring[theposition] = thechar) )

// This function converts any lower case letters in the string "RawWord", into capitals, so that the whole string is made of capital letters.
void MakeMeAllCapital(char *RawWord){
	unsigned int X;
	for ( X = 0; X < strlen(RawWord); X++ ){
		if ( RawWord[X] >= 'a' && RawWord[X] <= 'z' ) RawWord[X] = RawWord[X] + BIG_IT_UP;
	}
}

// This function removes all non-letter chars from "ThisString".
void RemoveIllegalChars(char *ThisString){
	unsigned int X;
	for ( X = 0; X < strlen(ThisString); X++ ) {
		if ( !(ThisString[X] >= 'A' && ThisString[X] <= 'Z') && !(ThisString[X] >= 'a' && ThisString[X] <= 'z') ) {
			memmove(ThisString + X, ThisString + X + 1, strlen(ThisString) - X);
			X -= 1;
		}
	}
}

// This is a simple Bubble Sort.  There is no need for a an optimal algorithm here, because the user input of this program will be very short.
void Alphabetize(char* Word){
	int X;
	int Y;
	char WorkingChar;
	int WordSize = strlen(Word);
	/* This nested "for" loop structure ensures that the highest letter filters to the back of the string each time that we	increment X in the outer loop. */
	for( X = 1; X < WordSize; X++ ) {
		for( Y = 0; Y <= (WordSize - X - 1); Y++ ) {
			if (Word[Y] > Word[Y + 1]){
				WorkingChar = Word[Y + 1];
				Word[Y + 1] = Word[Y];
				Word[Y] = WorkingChar;
			}
		}
	}
}

// Define the "Dawg" functionality as macros for speed.
#define DAWG_LETTER(thearray, theindex) (((thearray[theindex]&LETTER_BIT_MASK)>>LETTER_BIT_SHIFT) + 'A')
#define DAWG_END_OF_WORD(thearray, theindex) ((thearray[theindex]&END_OF_WORD_BIT_MASK)? TRUE: FALSE)
#define DAWG_NEXT(thearray, theindex) ((thearray[theindex]&END_OF_LIST_BIT_MASK)? 0: (theindex + 1))
#define DAWG_CHILD(thearray, theindex) (thearray[theindex]&CHILD_INDEX_BIT_MASK)

// A recursive depth first traversal of "TheDawg" lexicon to produce a readable wordlist in "TheStream".
void DawgTraverseLexiconRecurse(unsigned int *TheDawg, int CurrentIndex, int FillThisPosition, char *WorkingString, int *TheCount, FILE *TheStream){
	int PassOffIndex;
	WorkingString[FillThisPosition] = DAWG_LETTER(TheDawg, CurrentIndex);
	if ( DAWG_END_OF_WORD(TheDawg, CurrentIndex) ) {
		*TheCount += 1;
		WorkingString[FillThisPosition + 1] = '\0';
		// Include the Windows Carriage Return char.
		fprintf(TheStream, "|%6d|-|%-15s|\r\n", *TheCount, WorkingString);
	}
	if ( PassOffIndex = DAWG_CHILD(TheDawg, CurrentIndex) ) DawgTraverseLexiconRecurse(TheDawg, PassOffIndex, FillThisPosition + 1, WorkingString, TheCount, TheStream);
	if ( PassOffIndex = DAWG_NEXT(TheDawg, CurrentIndex) ) DawgTraverseLexiconRecurse(TheDawg, PassOffIndex, FillThisPosition, WorkingString, TheCount, TheStream);
}

// Move through the entire "ThisDawg" lexicon, and print the words into "ThisStream".
void DawgTraverseLexicon(unsigned int *ThisDawg, FILE *ThisStream){
	char *BufferWord = (char*)malloc((MAX + 1)*sizeof(char));
	int *WordCounter = (int*)malloc(sizeof(int));
	*WordCounter = 0;
	// Include the Windows Carriage Return char.
	fprintf(ThisStream, "This is the lexicon contained in the file |%s|.\r\n\r\n", DAWG_DATA);
	DawgTraverseLexiconRecurse(ThisDawg, 1, 0, BufferWord, WordCounter, ThisStream);
	free(BufferWord);
	free(WordCounter);
}

// This function is the core component of this program.  It requires that "UnusedChars" be in alphabetical order because the tradition Dawg is a list based structure.
void DawgAnagrammerRecurse(unsigned int *DawgOfWar, int CurrentIndex, char *ToyWithMe, int FillThisPosition, char *UnusedChars, int SizeOfBank, int *ForTheCounter){
	int X;
	char PreviousChar = '\0';
	char CurrentChar;
	int TempIndex = DAWG_CHILD(DawgOfWar, CurrentIndex);
	
	ToyWithMe[FillThisPosition] = DAWG_LETTER(DawgOfWar, CurrentIndex);
	//ToyWithMe[FillThisPosition + 1] = '\0';
	//printf("UnusedChars|%s|\n", UnusedChars);
	if ( DAWG_END_OF_WORD(DawgOfWar, CurrentIndex) ) {
		*ForTheCounter += 1;
		ToyWithMe[FillThisPosition + 1] = '\0';
		printf("|%4d| - |%-15s|\n", *ForTheCounter, ToyWithMe);
	}
	if ( (SizeOfBank > 0) && (TempIndex != 0) ) {
		for ( X = 0; X < SizeOfBank; X++ ) {
			CurrentChar = UnusedChars[X];
			//printf("Looking For |%c|\n", CurrentChar);
			if ( CurrentChar == PreviousChar ) continue;
			do {
				//printf("Compare -|%c| with ^|%c|-|%d|\n", CurrentChar, DAWG_LETTER(DawgOfWar, TempIndex), TempIndex);
				if ( CurrentChar == DAWG_LETTER(DawgOfWar, TempIndex) ) {
					//printf("Bingo\n");
					REMOVE_CHAR_FROM_STRING(UnusedChars, X, SizeOfBank - X);
					DawgAnagrammerRecurse(DawgOfWar, TempIndex, ToyWithMe, FillThisPosition + 1, UnusedChars, SizeOfBank - 1, ForTheCounter);
					//printf("Back|%d|\n", FillThisPosition);
					INSERT_CHAR_IN_STRING(UnusedChars, X, CurrentChar, SizeOfBank - X);
					TempIndex = DAWG_NEXT(DawgOfWar, TempIndex);
					break;
				}
				else if ( CurrentChar < DAWG_LETTER(DawgOfWar, TempIndex) ) break;
			} while ( TempIndex = DAWG_NEXT(DawgOfWar, TempIndex) );
			if ( TempIndex == 0 ) break;
			PreviousChar = CurrentChar;
		}
	}
}

// This function uses "MasterDawg" to determine the all of the words that can be made from the letters in "CharBank".
// The return value is the total number of words found.
int DawgAnagrammer(unsigned int *MasterDawg, char * CharBank){
	int X;
	int Result;
	int BankSize = strlen(CharBank);
	int *ForTheCount = (int*)malloc(sizeof(int));
	char *TheWordSoFar = (char*)malloc((MAX + 1)*sizeof(char));
	char *LettersToWorkWith = (char*)malloc((MAX_INPUT)*sizeof(char));
	char PreviousChar = '\0';
	char CurrentChar;
	int NumberOfLetters;
	strcpy(LettersToWorkWith, CharBank);
	NumberOfLetters = strlen(LettersToWorkWith);
	
	*ForTheCount = 0;
	for ( X = 0; X < BankSize; X++ ) {
		CurrentChar = CharBank[X];
		// Move to the next letter if we have already processed the "CurrentChar".
		if ( CurrentChar == PreviousChar ) continue;
		// We can assume that every letter in the lexicon exists on the first row, so don't bother looking for them.  Just remove the one we are using and plug away.
		REMOVE_CHAR_FROM_STRING(LettersToWorkWith, X, NumberOfLetters - X);
		DawgAnagrammerRecurse(MasterDawg, CurrentChar - '@', TheWordSoFar, 0, LettersToWorkWith, NumberOfLetters - 1, ForTheCount);
		INSERT_CHAR_IN_STRING(LettersToWorkWith, X, CurrentChar, NumberOfLetters - X);
		PreviousChar = CurrentChar;
	}
	Result = *ForTheCount;
	free(ForTheCount);
	free(TheWordSoFar);
	free(LettersToWorkWith);
	return Result;
}

int main() {
	int NumberOfNodes;
	unsigned int *TheDawgArray;
	FILE *Lexicon;
	FILE *WordList;
	char *DecisionInput;
	Bool FetchData = TRUE;
	char FirstChar;
	int InputSize;
	
	DecisionInput = (char*)malloc(MAX_INPUT*sizeof(char));
	Lexicon = fopen(DAWG_DATA, "rb");
	fread(&NumberOfNodes, sizeof(int), 1, Lexicon);
	printf("The lexicon DAWG contains |%d| nodes.\n", NumberOfNodes);
	TheDawgArray = (unsigned int*)malloc(NumberOfNodes*sizeof(unsigned int));
	fread(TheDawgArray, sizeof(unsigned int), NumberOfNodes, Lexicon);
	fclose(Lexicon);
	printf("\nLexicon data file TWL06 has been read into memory.\n\n");
	
	// This program relies on a compressed lexicon data file, so allow the user to see a readable word list in an output file.
	while ( FetchData == TRUE ) {
		printf("\nWould you like to print the Dawg word list into a file?(Y/N):  ");
		fgets(DecisionInput, MAX_INPUT, stdin);
		FirstChar = DecisionInput[0];
		if ( FirstChar == 'Y' || FirstChar == 'y' ) {
			WordList = fopen(WORD_LIST_OUTPUT, "w");
			DawgTraverseLexicon(TheDawgArray, WordList);
			fclose(WordList);
			FetchData = FALSE;
		}
		else if ( FirstChar == 'N' || FirstChar == 'n' ) FetchData = FALSE;
	}
	FetchData = TRUE;
	
	// Now the user can enter strings of letters for anagramming, to see the words that can be made from them.
	while ( FetchData == TRUE ) {
		printf("\nEnter the string of letters that you want to anagram(2 or more letters):  ");
		fgets(DecisionInput, MAX_INPUT, stdin);
		CUT_OFF_NEW_LINE(DecisionInput);
		RemoveIllegalChars(DecisionInput);
		MakeMeAllCapital(DecisionInput);
		Alphabetize(DecisionInput);
		InputSize = strlen(DecisionInput);
		if ( InputSize >= 2 ) {
			printf("\nThis is the set of letters that you just input |%s|.\n\n", DecisionInput);
			printf("\n|%d| Words were found in the lexicon Dawg.\n", DawgAnagrammer(TheDawgArray, DecisionInput));
		}
		else FetchData = FALSE;
	}
	printf("\nThank you for playing the GoogleDawgAnagrammer.  GAME OVER.\n\n");
	return 0;
}

