﻿
// Tanzil Text Tools
// Copyright (C) 2008 Hamid Zarrabi-Zadeh
// http://tanzil.info


//---------------------- Text Tools -----------------------


function TextTools()
{
	// matching rulles 
	this.matchingRules = new Array(

		["$HAMZA_SHAPE", "$HAMZA_SHAPE"],
		["$ALEF_MAKSURA", "YY"],
		["$ALEF", "[$ALEF$ALEF_MAKSURA$ALEF_WITH_MADDA_ABOVE$ALEF_WITH_HAMZA_ABOVE$ALEF_WITH_HAMZA_BELOW$ALEF_WASLA]"],  
		["[$TEH$MARBUTA]", "[$TEH$MARBUTA]"], 
		["$HEH", "[$HEH$MARBUTA]"], 
		["$WAW", "[$WAW$WAW_WITH_HAMZA_ABOVE$SMALL_WAW]"], 
		["$YEH", "[$YEH$ALEF_MAKSURA$YEH_WITH_HAMZA$SMALL_YEH]"], 
		["YY", "[$ALEF_MAKSURA$YEH$ALEF]"],
		[" ", "$SPACE"]
	);

	// wildcards
	this.wildcardRegs = new Array(
		["\\.", "P"],
		["\\*", "S"],
		["[?؟]", "Q"],
		["[QS]*S[QS]*", "S"],
		["^\s*[QS]*", ""]
	);

	// wildcards
	this.wildcards = new Array(
		["S", "$LETTER_HARAKA*"],
		//["S", "($LETTER|$HARAKA)*"],
		["Q", "$LETTER?"],
		["P", "$LETTER"]
	);

	this.preProcess = new Array(
		["[$FARSI_YEH$YEH_BARREE]", "$YEH"],
		["[$FARSI_KEHEH$SWASH_KAF]", "$KAF"]
	);

	// init
	for (var i in UGroups)
		UGroups[i] = this.regTrans(UGroups[i]);

}


//---------------------- General Functions -----------------------


// translate a symbolic regExp
TextTools.prototype.regTrans = function(str)
{
	return str.replace(/\$([A-Z_]+)/g, function(s, i, ofs, all) { return UGroups[i] || UChars[i] || ''; } );
}


// simulate preg_replace 
TextTools.prototype.pregReplace = function(fromExp, toExp, str)
{
	fromExp = new RegExp(this.regTrans(fromExp), 'g');
	toExp = this.regTrans(toExp);
	return str.replace(fromExp, toExp);
}


// apply a set of rules to a string
TextTools.prototype.applyRules = function(rules, str)
{
	for (var i in rules)
		str = this.pregReplace(rules[i][0], rules[i][1], str);
	return str;
}


//---------------------- Number Functions -----------------------


// convert english digits to arabic-indic
TextTools.prototype.arabicNumber = function(str)
{
	var res = String(str).replace(/([0-9])/g,
		function(s, n, ofs, all) { return String.fromCharCode(0x0660+ n* 1);} ); 
	return res;
}

// convert english digits to farsi
TextTools.prototype.farsiNumber = function(str)
{
	var res = String(str).replace(/([0-9])/g,
		function(s, n, ofs, all) { return String.fromCharCode(0x06F0+ n* 1);} ); 
	return res;
}

// returns Arabic name of a number (1-39)
TextTools.prototype.arabicNumberName = function(num)
{
	var unary = Array('الاول', 'الثاني', 'الثالث', 'الرابع', 'الخامس', 'السادس', 'السابع', 'الثامن', 'التاسع', 'العاشر');
	var decimal = Array('عشر', 'العشرون', 'الثلاثون');

	if (num <1 || num>=40) return '';
	if (num <= 10) return unary[num- 1];

	var digit = num% 10;
	var dec = parseInt(num/ 10);

	unary[0] = 'الحادي';
	var res = (digit > 0 ? unary[digit- 1]+ (dec > 1 ? ' و' : ' ') : '')+ decimal[dec- 1];
	return res;
}


//--------------------- Text Modification --------------------


// revise text according to args
TextTools.prototype.fixText = function(text, args)
{
	//args : {showSigns, showSmallAlef, ignoreInternalSigns, font}
	args = args || {};
	if (args.showSigns)
	{
		text = this.pregReplace(' ([$HIGH_SALA-$HIGH_SEEN])', '<span class="sign">&nbsp;$1</span>', text);
		text = this.pregReplace('([$RUB_EL_HIZB$SAJDAH])', args.ignoreInternalSigns ? '' : '<span class="internal-sign">$1</span>', text);
	}
	else
		text = this.pregReplace('[$HIGH_SALA-$RUB_EL_HIZB$SAJDAH]', '', text);
	
	if (!args.showSmallAlef)
		text = this.pregReplace('$SUPERSCRIPT_ALEF', '', text);

	if (args.font == 'me_quran')
		text = this.addSpaceTatweel(text);
		//text = this.pregReplace('$FATHA$SUPERSCRIPT_ALEF', '$FATHA$SMALL_ALEF', text);
	else if (args.type >= 10) // uthmani text
	{
		text = this.addSpaceTatweel(text);
		text = this.removeExtraMeems(text);
		text = this.pregReplace('($LAM$HARAKA*)$HAMZA($HARAKA*)$ALEF', '$1$HAMZA_ABOVE_ALEF$2', text);
		text = this.pregReplace('$SMALL_YEH($HARAKA*$LETTER)', '$TATWEEL$HIGH_YEH$1', text);
		text = this.pregReplace('$ALEF_MAKSURA($SUPERSCRIPT_ALEF$HARAKA*$LETTER)', '$DOTLESS_BEH$1', text);
	}
	text = this.pregReplace('$ALEF$MADDA', '$ALEF_WITH_MADDA_ABOVE', text);
	text = this.pregReplace('$SHADDA([$KASRA$KASRATAN])', '$1$SHADDA', text);
	return text;
}


// add space/tatweel before small-alef
TextTools.prototype.addSpaceTatweel = function(text)
{
	text = this.pregReplace('($SHADDA|$FATHA)($SUPERSCRIPT_ALEF)', '$1$TATWEEL$2', text);
	text = this.pregReplace('([$HAMZA$DAL-$ZAIN$WAW]$SHADDA?$FATHA)$TATWEEL($SUPERSCRIPT_ALEF)', '$1$ZWNJ$2', text);
	//text = this.pregReplace('([$HAMZA$DAL-$ZAIN$WAW]$FATHA)$TATWEEL($SUPERSCRIPT_ALEF)', '$1$2', text);
	return text;
}


// remoce extra meems in Uthmani text
TextTools.prototype.removeExtraMeems = function(text)
{
	text = this.pregReplace('([$FATHATAN$DAMMATAN])$LOW_MEEM', '$1', text);
	text = this.pregReplace('($KASRATAN)$HIGH_MEEM', '$1', text);
	return text;
}


// highlight a pattern in a string
TextTools.prototype.highlight = function(pattern, str)
{
	pattern = new RegExp('('+ pattern+ ')', 'g');
	str = str.replace(pattern, '◄$1►');
	str = str.replace(/◄\s/g, ' ◄').replace(/\s►/g, '► ');
	str = str.replace(/([^\s]*)◄/g, '◄$1').replace(/►([^\s]*)/g, '$1►');
	while (/◄[^\s]*◄/.test(str))
		str = str.replace(/(◄[^\s]*)◄/g, '$1').replace(/►([^\s]*►)/g, '$1');
	str = str.replace(/◄/g, '<span class="highlight">').replace(/►/g, '</span>');
	return str;
}


//---------------------- Pattern Modification -----------------------


// enrich arabic search pattern 
TextTools.prototype.enrichPattern = function(pattern, ignoreHaraka) 
{
	if (ignoreHaraka)
		pattern = this.pregReplace("$HARAKA", '', pattern);

	pattern = this.regTrans(pattern); // allows using letter constants in pattern
	pattern = this.handleSpaces(pattern);
	pattern = this.applyRules(this.preProcess, pattern);
	pattern = this.applyRules(this.wildcardRegs, pattern);

	// add haraka between letters
	pattern = this.pregReplace("(.)", "$1$HARAKA*", pattern);

	pattern = this.applyRules(this.matchingRules, pattern);
	pattern = this.applyRules(this.wildcards, pattern);

	return pattern;
}


// handle spaces in the search pattern 
TextTools.prototype.handleSpaces = function(pattern) 
{
	var prev = '';
	if (pattern == '') return pattern;
	pattern = pattern.replace(/\s+/g, ' ');	
	while (pattern != prev)
	{
		prev = pattern;
		//pattern = pattern.replace(/^([^"]*)("[^"]*")*([^"\s]*) /g, '$1$2$3+');
		pattern = pattern.replace(/^(([^"]*"[^"]*")*)([^"\s]*) /g, '$1$3+');
	}
	pattern = pattern.replace(/_/g, ' ');
	pattern = pattern.replace(/"/g, ' ');
	// remove extra operators
	pattern = pattern.replace(/^[+|]+/g, '').replace(/[+|!]+$/g, '');
	pattern = pattern.replace(/\+*([+|!])\+*/g, '$1');

	return pattern;
}





