MESSAGE_LOGGING = 0;

codonTo3     = {};
codonOffset  = 0;

codonTo3[0]  = "F";
codonTo3[1]  = "L";
codonTo3[2]  = "I";
codonTo3[3]  = "M";
codonTo3[4]  = "V";
codonTo3[5]  = "S";
codonTo3[6]  = "P";
codonTo3[7]  = "T";
codonTo3[8]  = "A";
codonTo3[9]  = "Y";
codonTo3[10] = "Stop";
codonTo3[11] = "H";
codonTo3[12] = "Q";
codonTo3[13] = "N";
codonTo3[14] = "K";
codonTo3[15] = "D";
codonTo3[16] = "E";
codonTo3[17] = "C";
codonTo3[18] = "W";
codonTo3[19] = "R";
codonTo3[20] = "G";

nucCharacters  = "ACGT";
characterOrder = "FLIMVSPTAYHQNKDECWRG";

function codeToLetters (codonCode)
{
	return nucCharacters[codonCode$16]+nucCharacters[(codonCode%16)$4]+nucCharacters[codonCode%4];	
}

/*-------------------------------------------------------------------------------------------------------------------------------------------------*/

function setupMapToTree (dummy)
{
	dummy = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def";
	ExecuteCommands ("#include \""+dummy+"\";");

	DataSet			ancestralSeqs  = ReconstructAncestors (lf);
	DataSetFilter	filteredDataA  = CreateFilter(ancestralSeqs,3,"","",GeneticCodeExclusions);

	HarvestFrequencies			  (observedCEFV,filteredData,3,3,0);

	stateCharCount = 64;

	for (h=0; h<64; h=h+1)
	{
		if (_Genetic_Code[h] == 10)
		{
			stateCharCount = stateCharCount -1;
		}
	}
	ambChoice = 1;

	seqToBranchMap = {stateCharCount,1};

	hShift = 0;

	_GC_ = {stateCharCount,1};
	correctCode = {stateCharCount,1};

	for (k=0; k<64; k=k+1)
	{
		if (_Genetic_Code[k]==10)
		{
			hShift = hShift+1;
		}
		else
		{
			seqToBranchMap[k-hShift] = observedCEFV[k];
			_GC_[k-hShift] = _Genetic_Code[k]-(_Genetic_Code[k]>10);
			correctCode[k-hShift] = k;
		}
	}

	observedCEFV = seqToBranchMap;

	branchNames = BranchName (givenTree,-1);
	h = Columns (branchNames);

	seqToBranchMap 	= {h, 2};
	/* maps sequence names to branch order in column 1 
	   and the other way around in column 2 */

	for (k=0; k<filteredData.species; k=k+1)
	{
		GetString (seqName, filteredData, k);
		seqToBranchMap[k][0] = -1;
		for (v=0; v<h; v=v+1)
		{
			if (branchNames[v] % seqName)
			{
				seqToBranchMap[k][0] = v;
				seqToBranchMap[v][1] = k;
				break;
			}
		}
	}

	seqToBranchMap[filteredData.species][0] = h-1;
	seqToBranchMap[h-1][1] = filteredData.species;


	for (k=1; k<filteredDataA.species; k=k+1)
	{
		GetString (seqName, filteredDataA, k);
		seqToBranchMap[filteredData.species+k][0] = -1;
		for (v=0; v<h; v=v+1)
		{
			if (branchNames[v] % seqName)
			{
				seqToBranchMap[k+filteredData.species][0] = v;
				seqToBranchMap[v][1] = k+filteredData.species;
				break;
			}
		}
	}

	GetDataInfo    (dupInfo, filteredData);
	GetDataInfo	   (dupInfoA, filteredDataA);

	matrixTrick  = {1,stateCharCount};
	matrixTrick2 = {1,stateCharCount};

	for (h=Columns(matrixTrick)-1; h>=0; h=h-1)
	{
		matrixTrick  [h] = h;
		matrixTrick2 [h] = 1;
	}

	codonInfo  = {filteredData.species, filteredData.unique_sites};
	codonInfo2 = {filteredDataA.species, filteredDataA.unique_sites};

	GetDataInfo    (dupInfo, filteredData);
	GetDataInfo	   (dupInfoA, filteredDataA);

	matrixTrick  = {1,stateCharCount};
	matrixTrick2 = {1,stateCharCount};

	for (h=Columns(matrixTrick)-1; h>=0; h=h-1)
	{
		matrixTrick  [h] = h;
		matrixTrick2 [h] = 1;
	}

	for (v=0; v<filteredData.unique_sites;v=v+1)
	{
		for (h=0; h<filteredData.species;h=h+1)
		{
			GetDataInfo (siteInfo, filteredData, h, v);
			_SITE_ES_COUNT = matrixTrick2 * siteInfo; 
			if (_SITE_ES_COUNT[0] == 1)
			{
				siteInfo = matrixTrick * siteInfo;
				codonInfo[h][v] = siteInfo[0];
			}
			else
			{
				codonInfo[h][v] = -1;
			}
		}
	}

	for (v=0; v<filteredDataA.unique_sites;v=v+1)
	{
		for (h=0; h<filteredDataA.species;h=h+1)
		{
			GetDataInfo (siteInfo, filteredDataA, h, v);
			siteInfo = matrixTrick * siteInfo;
			codonInfo2[h][v] = siteInfo[0];
		}
	}

	flatTreeRep	  = Abs (givenTree);
	GetInformation (seqStrings, filteredData);
	
	
	
	pairwiseMap       = {190,filteredDataA.sites};
	pairToCounterMap  = {20,20};
	oneStepReachable  = {190,1};
	
	idx = 0;
	
	pairwiseString = "";
	
	for (h=0; h<20; h=h+1)
	{
		for (v=h+1; v<20; v=v+1)
		{
			pairToCounterMap[h][v] = idx;
			pairToCounterMap[v][h] = idx;
			pairwiseString = pairwiseString + ";" + characterOrder[h] + characterOrder[v];
			idx = idx+1;
		}
	}
	
	hshift = 0;
	for (h=0; h<64; h=h+1)
	{
		if (_Genetic_Code[h]==10) 
		{
			hshift = hshift+1;
			continue; 
		}
		
		vshift = hshift;
		for (v = h+1; v<64; v=v+1)
		{
			diff = v-h;
			if (_Genetic_Code[v]==10) 
			{
				vshift = vshift+1;
				continue; 
			}
			nucPosInCodon = 2;
			if ((h$4==v$4)||((diff%4==0)&&(h$16==v$16))||(diff%16==0))
			{
				aa1 = _Genetic_Code[h];
				aa2 = _Genetic_Code[v];
				
				if (aa1!=aa2) 
				{
					aa1 = aa1-(aa1>10);
					aa2 = aa2-(aa2>10);
					aa1 = pairToCounterMap[aa1][aa2];
					oneStepReachable [aa1] = 1;
				}
			}
		}
	}

	synonymousMap     = {20,filteredDataA.sites};		
	return 0;
}

/*-------------------------------------------------------------------------------------------------------------------------------------------------*/

function mapSiteToTree (site)
{
	TREE_OUTPUT_OPTIONS = {};
	k = filteredData.species+1;
	
	countSubstitutionsInt  = {};
	countSubstitutionsLeaf = {};
	
	/* first sequence is always the root */
	c1 = dupInfoA[site];
	for (h=1; h<filteredDataA.species; h=h+1)
	{
		p1  = seqToBranchMap[k][0];
		pid = flatTreeRep[p1];
		p2  = seqToBranchMap[pid][1]-filteredData.species;
		
		cd1 = codonInfo2[h] [c1];
		cd2 = codonInfo2[p2][c1];
				
		aa1 = _GC_[cd1];
		aa2 = _GC_[cd2];
		
		if (cd1 != cd2)
		{
			if (aa1 == aa2)
			{
				synonymousMap[aa1][site] = synonymousMap[aa1][site] + 1;
			}
			else
			{
				idx		    = pairToCounterMap[aa1][aa2];
				pairwiseMap [idx][site] = pairwiseMap [idx][site] + 1;
			}
		}
		

		cd1 = branchNames[p1];
		
		k=k+1;
	}
	
	/* now do the leaves */
	
	c2 = dupInfo[site];
	for (h=0; h<filteredData.species; h=h+1)
	{
		p1  = seqToBranchMap[h][0];
		pid = flatTreeRep[p1];
		p2  = seqToBranchMap[pid][1]-filteredData.species;
		cd2 = codonInfo2[p2][c1];
		cd1 = codonInfo [h][c2];
		
		if (cd1>=0)
		/* no ambiguities */
		{
			aa1 = _GC_[cd1];
			aa2 = _GC_[cd2];			
			if (cd1 != cd2)
			{
				if (aa1 == aa2)
				{
					synonymousMap[aa1][site] = synonymousMap[aa1][site] + 1;
				}
				else
				{
					idx		    = pairToCounterMap[aa1][aa2];
					pairwiseMap [idx][site] = pairwiseMap [idx][site] + 1;
				}
			}

			cd1 = branchNames[p1];
			TREE_OUTPUT_OPTIONS[cd1] = nodeSpec;
		}	
		else
		/* ambiguities here */
		{
			seqString = seqStrings[h];
			GetDataInfo    (ambInfo, filteredData, h, c2);	
			
			corrector = Transpose (ambInfo["1"]) * ambInfo;
			if (corrector[0] < stateCharCount)
			{
				corrector = 1/corrector[0];
				aa2 	  = _GC_[cd2];			

				for (k=0; k<stateCharCount; k=k+1)
				{
					if (ambInfo[k])
					{
						aa1  = _GC_[k];
						if (k != cd2)
						{
							if (aa1 == aa2)
							{
								synonymousMap[aa1][site] = synonymousMap[aa1][site] + corrector;
							}
							else
							{
								idx		    			 = pairToCounterMap[aa1][aa2];
								pairwiseMap [idx][site]  = pairwiseMap [idx][site] + corrector;
							}
						}
					}
				}	
			}
			cd1 = branchNames[p1];
		}
	}
		
	return 0;
}

/*-------------------------------------------------------------------------------------------------------------------------------------------------*/

function 	storeProfile (recp&, string, value)
{
	for (h=0; h<Abs(string); h=h+1)
	{
		v = string[h];
		v = orderMap[v];
		recp[v] = value;
	}
	return 0;
}

/*-------------------------------------------------------------------------------------------------------------------------------------------------*/

stanfel	 = {20,1};

order = "FLIMVSPTAYHQNKDECWRG";
orderMap = {};

for (h=0; h<20; h=h+1)
{
	v = order[h];
	orderMap[v] = h;
}


storeProfile ("stanfel","ACGILMPSTV",1);
storeProfile ("stanfel","DENQ",2);
storeProfile ("stanfel","FWY",3);
storeProfile ("stanfel","HKR",4);


setupMapToTree(0);

labels         = {1,filteredData.sites+1};

for (site_enumerator = 0; site_enumerator < filteredData.sites; site_enumerator = site_enumerator + 1)
{
	mapSiteToTree (site_enumerator);
	labels [site_enumerator] = "Site " + (site_enumerator+1);
}

labels[site_enumerator] = pairwiseString;

OpenWindow (CHARTWINDOW,{{"Pairwise counts by site"}
		{"labels"}
		{"pairwiseMap"}
		{"None"}
		{""}
		{""}
		{""}
		{""}
		{""}
		{"0"}
		{""}
		{"-1;-1"}
		{"10;1.309;0.785398"}
		{"Times:12:0;Times:10:0;Times:12:2"}
		{"0;0;16777215;0;0;0;6579300;11842740;13158600;14474460;0;3947580;16777215;16711680;6845928;16771158;2984993;9199669;7018159;1460610;16748822;11184810;14173291"}
		{"16,0,0"}
		},
		"749;663;200;200");

labels = {{"Count","One Step Reachable","Stanfel Change",pairwiseString}};

h = {filteredData.sites,1};
h = h["1"];

pairwiseTotal     = pairwiseMap * h;
h = Rows(pairwiseMap);
pairwiseTotalPlus = {h,3};

for (h=0; h<20; h=h+1)
{
	for (v=h+1; v<20; v=v+1)
	{
		h2 = pairToCounterMap[h][v];
		pairwiseTotalPlus[h2][2] = (stanfel[h] == stanfel[v]);
	}
}

h = Rows(pairwiseMap);

consCount = 0;
radCount  = 0;

for (v = 0; v < h; v = v + 1)
{
	pairwiseTotalPlus[v][0] = pairwiseTotal[v];
	pairwiseTotalPlus[v][1] = oneStepReachable[v];
	if (pairwiseTotalPlus[v][2])
	{
		radCount  = radCount + pairwiseTotal[v];
	}
	else
	{
		consCount = consCount + pairwiseTotal[v];
	}	
	
}

synCount = {1,20};
synCount = synCount["1"]*synonymousMap;
synCount = synCount*Transpose(synCount["1"]);

fprintf (stdout, "\n", synCount[0], " synonymous substitutions\n", consCount, " conservative substitutions\n", radCount, " radical substitutions\n");	
OpenWindow (CHARTWINDOW,{{"Pairwise counts totals"}
		{"labels"}
		{"pairwiseTotalPlus"}
		{"None"}
		{""}
		{""}
		{""}
		{""}
		{""}
		{"0"}
		{""}
		{"-1;-1"}
		{"10;1.309;0.785398"}
		{"Times:12:0;Times:10:0;Times:12:2"}
		{"0;0;16777215;0;0;0;6579300;11842740;13158600;14474460;0;3947580;16777215;16711680;6845928;16771158;2984993;9199669;7018159;1460610;16748822;11184810;14173291"}
		{"16,0,0"}
		},
		"749;663;70;70");		
