#include "StdAfx.h"
#include "Cluster.h"

#include <math.h>
#include <stdio.h>
#include <limits.h>
#include <float.h>

/* CEM = conditional expectation maximisation */

/*
This code is based on 
KlustaKwik
----------
KlustaKwik version 1.6
----------------------

KlustaKwik is a program for unsupervised classification of multidimensional
continuous data. It arose from a specific need - automatic sorting of neuronal
action potential waveforms (see KD Harris et al, Journal of Neurophysiology
84:401-414,2000), but works for any type of data.  We needed a program that
would:

1) Fit a mixture of Gaussians with unconstrained covariance matrices
2) Automatically choose the number of mixture components
3) Be robust against noise
4) Reduce the problem of local minima
5) Run fast on large data sets (up to 100000 points, 48 dimensions)

Speed in particular was essential.  KlustaKwik is based on the CEM algorithm of
Celeux and Govaert (which is faster than the standard EM algorithm), and also
uses several tricks to improve execution speed while maintaining good
performance.  On our data, it runs at least 10 times faster than Autoclass.


Cluster splitting and deletion
------------------------------

The main improvement in version 1.6 is the ability to specify Bayesian information content or AIC information content 
as the penalty for a larger number of clusters or a mixture of these two.  
KlustaKwik allows for a variable number of clusters to be fit. 
The program periodically checks if splitting any cluster would improve the overall score.  
It also checks to see if deleting any cluster and reallocating its points would improve overall score.  
The splitting and deletion features allow the program to often escape from local minima, 
reducing sensitivity to the initial number of clusters, and reducing the total number of starts needed for a data set.


Binaries
--------
The current distribution contains pre-compiled binaries for Linux Pentium, Mac OS X, and Windows. 
These are located in separate subdirectories.


Compilation
-----------

The program is written in C++.  To compile under unix, extract all files to a
single directory and type make.  That should be all you need to do.  If it
doesn't work, change the makefile to replace g++ with the name of your C++
compiler. A Metrowerks Codewarrior project is included for compilation on 
the Mac or Windows.

To check it compiled properly type "KlustaKwik test 1 -MinClusters 2" to run
the program on the supplied test file. The outputs, test.clu.1 and test.model.1,
should match the supplied test_res.clu.1 and test_res.model.1 files (excepting
line endings, which may vary by platform).


Usage
-----

The program takes a "feature file" as input, and produces two output files, the
"cluster file", and a log file.  The file formats and conventions may seem
slightly strange.  This is for historical reasons.  If you want to change the
code, go ahead, this is open source software.

The feature file should have a name like FILE.fet.n, where FILE is any string,
and n is a number.  The program is invoked by running "KlustaKwik FILE n", and
will create a cluster file FILE.clu.n and a log file FILE.klg.n.  The number n
doesn't serve any purpose other than to let you have several files with the
same file base.

The first line of the feature file should be the number of input dimensions. 
The following lines are the data, with each line being one data instance,
consisting of a list of numbers separated by spaces.  An example file test.fet.1 is provided. 
Please note that the features can be the sample values of a putative waveform event.

The first line of the cluster file will be the number of classes that the
program chose.  The following lines will be the classes asigned to the data
points.  Class 1 is a "noise cluster" modelled by a uniform distribution, which
should contain outliers, if there are any.


Parameters
----------

It is possible to pass the program parameters by running "KlustaKwik FILE n
params" etc.  All parameters have default values. Here are the parameters you can use:

-ChangedThresh f (default 0.05)
All log-likelihoods are recalculated if the fraction of instances changing class exeeds f (see DistThresh)

-Debug n         (default 0)
Miscellaneous debugging information (not recommended)

-DistDump        (default 0)
Outputs a ridiculous amount of debugging information (definately not recommended).

-DistThresh d    (default 6.907755 = ln(1000) )
Time-saving paramter.  If a point has log likelihood more than d worse for a
given class than for the best class, the log likelihood for that class is not
recalculated.  This saves an awful lot of time.

-fSaveModel n    (default 1)
If n is non-zero, save final model to .model file.

-FullStepEvery n (default 10)
All log-likelihoods are recalculated every n steps (see DistThresh)

-help
Prints a short message and then the default parameter values.

-Log n           (default 1)
Produces .klg log file if non-zero. (to switch off use -Log 0)

-MaxClusters n   (default 10)
The random initial assignment will have no more than n clusters. 

-MaxIter n       (default 500)
Don't try more than n iterations from any starting point.

-MaxPossibleClusters n   (default 100)
Cluster splitting can produce no more than n clusters.

-MinClusters n   (default 2)
The random intial assignment will have no less than n clusters.  The final
number may be different, since clusters can be split or deleted during the
course of the algorithm.

-PenaltyMix d    (default 0.0)
Amount of BIC to use a penalty for more clusters. Default of 0 sets to use all AIC. 
Use 1.0 to use all BIC (this generally produces fewer clusters).

-nStarts n       (default 1)
The algorithm will be started n times for each inital cluster count between
MinClusters and MaxClusters.

-RandomSeed n    (default 1)
Specifies a seed for the random number generator

-Screen n        (default 1)
Produces parameters and progress information on the console. Set to 0 to suppress output in batches.

-SplitEvery n    (default 50)
Test to see if any clusters should be split every n steps. 0 means don't split.

-StartCluFile STRING   (default "")
Treats the specified cluster file as a "gold standard".  If it can't find a
better cluster assignment, it will output this.

-UseFeatures STRING   (default 11111111111100001)
Specifies a subset of the input features to use.  STRING should consist of 1s
and 0s with a 1 indicating to use the feature and a 0 to leave it out, 
or the string 'ALL' indicating that every column in the feature file should be used.  
NB The default value for this parameter is 11111111111100001 (because this is what we use in the lab).

-Verbose n   (default 1)
Provide more diagnostic output if non-zero.


Contact Information
-------------------

This program is copyright Ken Harris (harris@axon.rutgers.edu), 2000-2003. It
is distributed under the GNU General Public License (www.gnu.org) at http:klustakwik.sourceforge.net. 
If you make any changes or improvements, please let me know.

*/

/*
An instance of CCluster can be embedded in another class. It's constructer
sets sensible defaults. Call DoPartition(data,num_events,num_dimenstions) to
do a cluster analysis.
*/

int CCluster::s_MinClusters=2; // Min and MaxClusters includes cluster 1, the noise cluster
int CCluster::s_MaxClusters=10;
int CCluster::s_MaxPossibleClusters=100; // splitting can't make it exceed this
// s_NumStarts not exposed; always just 1
int CCluster::s_NumStarts=1; // number of times to start count from each number of clusters
int CCluster::s_RandomSeed=1;
int CCluster::s_DistDump=0;
float CCluster::s_DistThresh=(float)log(1000.f); // Points with at least this much difference from 
			// the best do not get E-step recalculated - and that's most of them
int CCluster::s_FullStepEvery=10;		// But there is always a full estep this every this many iterations
float CCluster::s_ChangedThresh=0.05f;	// Or if at least this fraction of points changed class last time
int CCluster::s_MaxIter=500; // max interations
//extern char StartCluFile[];
int CCluster::s_SplitEvery=50;
float CCluster::s_PenaltyMix=1.f;		// amount of BIC to use for penalty, must be between 0 and 1

const double PI= 3.14159265358979323846;


CCluster::CCluster(void)
{
	m_DimCount = 0;
	m_ItemCount = 0;

	m_Log = 1;
	
	m_pData=m_pWeight=m_pMean=m_pCov=m_pLogP=NULL; 
	m_pClass=m_pOldClass=m_pClass2=m_pBestClass=m_pAliveIndex=NULL;
	m_pbClassAlive=NULL;
}
CCluster::CCluster(CCluster *pSource)
{
	int i;
	m_DimCount = pSource->m_DimCount;
	m_ItemCount = pSource->m_ItemCount;

	m_Log = pSource->m_Log;
	
	m_pData=m_pWeight=m_pMean=m_pCov=m_pLogP=NULL; 
	m_pClass=m_pOldClass=m_pClass2=m_pBestClass=m_pAliveIndex=NULL;
	m_pbClassAlive=NULL;

	AllocateArrays();
	for (i=0; i<m_DimCount*m_ItemCount; i++)
		m_pData[i]=pSource->m_pData[i];
}

CCluster::~CCluster(void)
{
	DelWorkingArrays();
}

void CCluster::Reindex() 
{
    int c;

    m_pAliveIndex[0] = 0;	// noise index
    m_nClustersAlive=1;
    for(c=1;c<s_MaxPossibleClusters;c++)	// start at 1 because noise class is always alive
	{
        if (m_pbClassAlive[c]) 
		{
            m_pAliveIndex[m_nClustersAlive] = c;
            m_nClustersAlive++;
        }
    }
}

// ComputeScore() - computes total score.  Requires M, E, and C steps to have been run
double CCluster::ComputeScore() 
{
    int p;

    double Score = Penalty(m_nClustersAlive);
    for(p=0; p<m_ItemCount; p++) 
	{
        Score += m_pLogP[p*s_MaxPossibleClusters + m_pClass[p]];
//		Output("point %d: cumulative score %f\n", p, Score);
    }

#if 0
	if (Debug) {
		int c, cc;
		float tScore;
		for(cc=0; cc<nClustersAlive; cc++) {
			c = AliveIndex[cc];
			tScore = 0;
				// possible bug:  "int" added to next statement
			for(int p=0; p<nPoints; p++) if(Class[p]==c) tScore += LogP[p*MaxPossibleClusters + Class[p]];
			Output("class %d has subscore %f\n", c, tScore);
		}
	}
#endif

    return Score;
}


// Penalty(nAlive) returns the complexity penalty for that many clusters
// bearing in mind that cluster 0 has no free params except p.
double CCluster::Penalty(int n) 
{
		int nParams;

        if(n==1) 
			return 0.;
		
		 nParams = (m_DimCount*(m_DimCount+1)/2 + m_DimCount + 1)*(n-1); // each has cov, mean, &p

		// Use AIC
		//return nParams*2;
				
		// BIC is too harsh
		//return nParams*log(nPoints)/2;
 		
 		// return mixture of AIC and BIC
 		return (1.0 - s_PenaltyMix) * nParams * 2 + s_PenaltyMix * (nParams * log(float(m_ItemCount))/2);
}

double CCluster::CEM(bool bRecurse)
{
	CString op;
	int p, c;
	int nChanged;
	int nIter;
	int *pOldClass=new int[m_ItemCount];
	float Score=0.f, OldScore;
	bool bLastStepFull; // stores whether the last step was a full one
	bool bDidSplit;
	
	
	// assign each item to a random class initially
	if (m_nStartingClusters>1)
	{
		for(p=0; p<m_ItemCount; p++) 
#if FIXED_OP_FOR_COMP
			m_pClass[p] = p%m_nStartingClusters;
#else
			m_pClass[p] = irand(1, m_nStartingClusters-1);
#endif
	}
	else
	{
		for(p=0; p<m_ItemCount; p++) 
			m_pClass[p] = 0;
	}

	// set all clases to alive
	for(c=0; c<s_MaxPossibleClusters; c++) 
		m_pbClassAlive[c] = (c<m_nStartingClusters);

	Reindex();

	// main loop
	nIter = 0;
	m_bFullStep = true;
	do {
		// Store old classifications
		for(p=0; p<m_ItemCount; p++) 
			pOldClass[p] = m_pClass[p];
		
		// M-step - calculate class weights, means, and covariance matrices for each class
		MStep();
		
		// E-step - calculate scores for each point to belong to each class
		EStep();

		// dump distances if required
		
		// C-step - choose best class for each 
		CStep(); 
		
		// Would deleting any classes improve things?
		if (bRecurse)
			ConsiderDeletion();
		
		// Calculate number changed
		nChanged = 0;
		for(p=0; p<m_ItemCount; p++)
			if (pOldClass[p] != m_pClass[p])
				nChanged++;
		
		// Calculate score
		OldScore = Score;
		Score = ComputeScore();

#if 0
			// save cluster centers for later output, but
			// only if not just horsing around with testing
			// splits.
		if (bRecurse && (Score < kSv.BestScoreSave)) 
		{
			SaveBestMeans();
				// this might be better than the score
				// returned at the end of the iteration
			kSv.BestScoreSave = Score;
		}
#endif



        if(bRecurse==0) 
			Output("\t");
        Output("Iteration %d%c: %d clusters Score %.7g nChanged %d\n",
		nIter, m_bFullStep ? 'F' : 'Q', m_nClustersAlive, Score, nChanged);

		nIter++;

#if 0
		if (Debug) {
			for(p=0;p<m_ItemCount;p++) BestClass[p] = Class[p];
			SaveOutput();
			Output("Press return");
			getchar();
		}
#endif
		
		// Next step a full step?
		bLastStepFull = m_bFullStep;
		m_bFullStep = (
						nChanged>s_ChangedThresh*m_ItemCount
						|| nChanged == 0	// not sure why this is here
						|| nIter%s_FullStepEvery==0 
					//	|| Score > OldScore Doesn't help!  
					//	Score decreases are not because of quick steps!
					) ;
		if (nIter>s_MaxIter) 
		{
			Output("Maximum iterations exceeded\n");
			break;
		}		

        // try splitting
		// fix:  first "&&" changed from "&"
        if (bRecurse && s_SplitEvery>0 && (nIter%s_SplitEvery==s_SplitEvery-1 || (nChanged==0 && bLastStepFull))) 
		{

#ifdef _DEBUG
    CMemoryState oldMemState, newMemState, diffMemState;
    oldMemState.Checkpoint();
#endif


            bDidSplit = TrySplits();

#ifdef _DEBUG
    newMemState.Checkpoint();
    if( diffMemState.Difference( oldMemState, newMemState ) )
    {
        TRACE( "Memory leaked!\n" );
    }
#endif

        
		
		} 
		else 
			bDidSplit = false;

	
	
	
	
	} while (nChanged > 0 || !bLastStepFull || bDidSplit);

//	if (DistDump) fprintf(Distfp, "\n");

	delete [] pOldClass;
	return Score;
}


// for each cluster, try to split it in two.  if that improves the score, do it.
// returns 1 if split was successful
bool CCluster::TrySplits() 
{
    if(m_nClustersAlive>=s_MaxPossibleClusters-1) 
	{
		Output("Won't try splitting - already at maximum number of clusters\n");
        return false;
    }

    int c, cc, c2, p, p2, d;
	bool bDidSplit = false;
    float Score, NewScore, UnsplitScore, SplitScore;
    int UnusedCluster;
    CCluster C3(this); // third one for comparison


    Score = ComputeScore();


    // loop thu clusters, trying to split
    for (cc=1; cc<m_nClustersAlive; cc++) 
	{
        c = m_pAliveIndex[cc];

        // set up C2 structure to contain points of this cluster only

	    CCluster C2; // second KK structure for sub-clustering
        // count number of points and allocate memory
        C2.m_ItemCount= 0;
        for(p=0; p<m_ItemCount; p++) 
			if(m_pClass[p]==c) 
				C2.m_ItemCount++;
        if(C2.m_ItemCount==0) 
			continue;
        C2.m_DimCount= m_DimCount;
        C2.AllocateArrays();	// also sets nDims2
        C2.m_NoisePoint = 0;

        // put data into C2
        p2=0;
        for(p=0; p<m_ItemCount; p++) 
			if(m_pClass[p]==c) 
			{
	            for(d=0; d<m_DimCount; d++) 
					C2.m_pData[p2*m_DimCount + d] = m_pData[p*m_DimCount + d];
				p2++;
			}

        // find an unused cluster
        UnusedCluster = -1;
        for(c2=1; c2<s_MaxPossibleClusters; c2++) 
		{
             if (!m_pbClassAlive[c2]) 
			 {
                 UnusedCluster = c2;
                 break;
             }
        }
        if (UnusedCluster==-1) 
		{
			Output("No free clusters, abandoning split");
            return bDidSplit;
        }

        // do it
		Output("Trying to split cluster %d (%d points) \n", c, C2.m_ItemCount);
        C2.m_nStartingClusters=2; // (2 = 1 clusters + 1 unused noise cluster)
        UnsplitScore = C2.CEM(false);
        C2.m_nStartingClusters=3; // (3 = 2 clusters + 1 unused noise cluster)
        SplitScore = C2.CEM(false);

		Output("Unsplit score = %f, split score = %f\n",UnsplitScore,SplitScore);

        if(SplitScore<UnsplitScore) 
		{
            // will splitting improve the score in the whole data set?

            // assign clusters to K3
            for(c2=0; c2<s_MaxPossibleClusters; c2++) 
				C3.m_pbClassAlive[c2]=false;
            p2 = 0;
            for(p=0; p<m_ItemCount; p++) 
			{
                if(m_pClass[p]==c) 
				{
                    if(C2.m_pClass[p2]==1) 
						C3.m_pClass[p] = c;
                    else if(C2.m_pClass[p2]==2) 
						C3.m_pClass[p] = UnusedCluster;
                    else 
					{
						TRACE("split should only produce 2 clusters; got cluster %d\n",C2.m_pClass[p2]);
						ASSERT(FALSE);
#ifdef _DEBUG
						m_nSplitError++;
#endif
						return false;
					}
                    p2++;
                } 
				else 
					C3.m_pClass[p] = m_pClass[p];
                C3.m_pbClassAlive[C3.m_pClass[p]] = true;
            }
            C3.Reindex();

            // compute scores
            C3.MStep();
            C3.EStep();
            NewScore = C3.ComputeScore();
			Output("Splitting cluster %d changes total score from %f to %f\n", c, Score, NewScore);

            if (NewScore<Score) {
                bDidSplit = true;
                Output("So it's getting split into cluster %d.\n", UnusedCluster);

                // so put clusters from K3 back into main KK struct (K1)
                for(c2=0; c2<s_MaxPossibleClusters; c2++) 
					m_pbClassAlive[c2] = C3.m_pbClassAlive[c2];
                for(p=0; p<m_ItemCount; p++) 
					m_pClass[p] = C3.m_pClass[p];
            } 
			else 
			{
                Output("So it's not getting split.\n");
            }
        }
    }
    return bDidSplit;
}




// M-step: Calculate mean, cov, and weight for each living class
// also deletes any classes with less points than nDim
void CCluster::MStep()
{
	int p, c, cc, i, j;

	int *pClassMembers=new int[s_MaxPossibleClusters];
	float *pVec2Mean=new float[m_DimCount];

	// clear arrays
	for(c=0; c<s_MaxPossibleClusters; c++) 
	{
		pClassMembers[c] = 0;
		for(i=0; i<m_DimCount; i++) 
			m_pMean[c*m_DimCount + i] = 0;

		// leaves lower triangles unitialized
		for(i=0; i<m_DimCount; i++) 
		{
			for(j=i; j<m_DimCount; j++) 
			{
				m_pCov[c*m_DimCount2 + i*m_DimCount + j] = 0;
			}
		}
// For consistancy and valgrind, initialize the rest.
// Might be removed for more speed.
		for(i=0; i<m_DimCount; i++) 
			for(j=0; j<i; j++) 
			{
				m_pCov[c*m_DimCount2 + i*m_DimCount + j] = 0;
			}
	}
	
	// Accumulate total number of points in each class
	for (p=0; p<m_ItemCount; p++) 
		pClassMembers[m_pClass[p]]++;


    // check for any dead classes
    for (cc=0; cc<m_nClustersAlive; cc++) 
	{
        c = m_pAliveIndex[cc];
		// bug fixed:  "&" replaced by "&&"
        if ((c>0) && (pClassMembers[c]<=m_DimCount)) 
		{
            m_pbClassAlive[c]=false;
			Output("Deleted class %d: not enough members\n", c);
        }
    }
    Reindex();


	// Normalize by total number of points to give class weight
	// Also check for dead classes
	for (cc=0; cc<m_nClustersAlive; cc++) 
	{
        c = m_pAliveIndex[cc];
        // add "noise point" to make sure Weight for noise cluster never gets to zero
        if(c==0) 
		{
      		m_pWeight[c] = ((float)pClassMembers[c]+m_NoisePoint) / (m_ItemCount+m_NoisePoint);
        } 
		else 
		{
        	m_pWeight[c] = ((float)pClassMembers[c]) / (m_ItemCount+m_NoisePoint);
        }
	}
	Reindex();

	
	// Accumulate sums for mean caculation
	for (p=0; p<m_ItemCount; p++) 
	{
		c = m_pClass[p];
		for(i=0; i<m_DimCount; i++) {
			m_pMean[c*m_DimCount + i] += m_pData[p*m_DimCount + i];
		}
	}
	
	// and normalize
    for (cc=0; cc<m_nClustersAlive; cc++) 
	{
        c = m_pAliveIndex[cc];
		if (pClassMembers[c]==0) 
			continue; // avoid divide by 0
		for (i=0; i<m_DimCount; i++) 
			m_pMean[c*m_DimCount+ i] /= pClassMembers[c];
	}
	
	
	// Accumulate sums for covariance calculation
	for (p=0; p<m_ItemCount; p++) 
	{
		c = m_pClass[p];
		
		// calculate distance from mean
		for(i=0; i<m_DimCount; i++) 
			pVec2Mean[i] = m_pData[p*m_DimCount + i] - m_pMean[c*m_DimCount + i];
		
		for(i=0; i<m_DimCount; i++) 
		{
			for(j=i; j<m_DimCount; j++) 
			{
				m_pCov[c*m_DimCount2 + i*m_DimCount + j] += pVec2Mean[i] * pVec2Mean[j];
			}
		}
	}
	
	// and normalize
    for (cc=0; cc<m_nClustersAlive; cc++) 
	{
        c = m_pAliveIndex[cc];
		if (pClassMembers[c]<=1) 
			continue; // avoid divide by 0		
		for(i=0; i<m_DimCount; i++) 
			for(j=i; j<m_DimCount; j++) 
			{
				m_pCov[c*m_DimCount2 + i*m_DimCount + j] /= (pClassMembers[c]-1);
		}
	}
	// That's it!

#if 0
	// Diagnostics
	if (Debug) {
			for (c=0; c<nClusters; c++) {
			Output("Class %d - Weight %.2g\n", c, Weight[c]);
			Output("Mean: ");
			MatPrint(stdout, Mean.m_pData + c*m_DimCount, 1, m_DimCount);
//			for(i=0; i<m_DimCount; i++) Output("%.2g ", Mean[c*m_DimCount+i]);
			Output("\nCov:\n");
			MatPrint(stdout, Cov.m_pData + c*m_DimCount2, m_DimCount, m_DimCount);
//			for(i=0; i<m_DimCount; i++) {
//				for(j=0; j<m_DimCount; j++) Output("%.2g ", Cov[c*m_DimCount2+i*m_DimCount+j]);
//				Output("\n");
//			}
			Output("\n");
		}
	}
#endif

	delete [] pClassMembers;
	delete [] pVec2Mean;
	
	return;
}

const float M_PI= 3.14159265358979323846f;

// E-step.  Calculate Log Probs for each point to belong to each living class
// will delete a class if covariance matrix is singular
// also counts number of living classes
void CCluster::EStep()
{
	int p, c,cc, i;
	int nSkipped;
//	float RootDet; // square root of covariance determinant
	float LogRootDet; // log of square root of covariance determinant
	float Mahal; // Mahalanobis distance of point from cluster center
	float *pChol=new float[m_DimCount2];
	float *pVec2Mean=new float[m_DimCount];	 // stores data point minus class mean
	float *pRoot=new float[m_DimCount];	// stores result of Chol*Root = Vec

    float *OptPtrLogP;		// pointer for setting LogP more efficiently
    int *OptPtrClass = m_pClass;
    int *OptPtrOldClass = m_pOldClass;

//	m_nClustersAlive = 0;
	nSkipped = 0;
	
	// start with cluster 0 - uniform distribution over space 
	// because we have normalized all dims to 0...1, density will be 1.
	for (p=0; p<m_ItemCount; p++) 
		m_pLogP[p*s_MaxPossibleClusters + 0] = (float)-log(m_pWeight[0]);
	
	for (cc=1; cc<m_nClustersAlive; cc++)
	{
		c=m_pAliveIndex[cc];

		// calculate cholesky decomposition for class c
		if (Cholesky(m_pCov+c*m_DimCount2, pChol, m_DimCount)) {
			// If Cholesky returns 1, it means the matrix is not positive definite.
			// So kill the class.
			Output("Deleting class %d: covariance matrix is	singular\n", c);
			m_pbClassAlive[c] = false;
//			for (p=0; p<m_ItemCount; p++) 
//				m_pLogP[p*m_MaxClusters + c] = FLT_MAX;
			continue;
		}			

		// Class is alive - so count it and continue
//		m_nClustersAlive++;
			
		// RootDet is given by log of product of diagonal elements
		LogRootDet = 0;
		for(i=0; i<m_DimCount; i++) 
			LogRootDet += log(pChol[i*m_DimCount + i]);

		for (p=0; p<m_ItemCount; p++) 
		{
		    // optimize for speed ...
		    OptPtrLogP = m_pLogP + (p*s_MaxPossibleClusters);


			// to save time -- only recalculate if the last one was close
			if (
				!m_bFullStep 
                && OptPtrClass[p] == OptPtrOldClass[p]
				&& OptPtrLogP[c] - OptPtrLogP[OptPtrClass[p]] > s_DistThresh

//				&& m_pLogP[p*m_MaxClusters+c] - m_pLogP[p*m_MaxClusters+m_pClass[p]] > m_DistThresh 
//				&& m_pClass[p] == m_pOldClass[p]
			) 
			{
				nSkipped++;
				continue;
			}
			
			// Compute Mahalanobis distance
			Mahal = 0;
			
			// calculate data minus class mean
			for(i=0; i<m_DimCount; i++) 
				pVec2Mean[i]=m_pData[p*m_DimCount+i]-m_pMean[c*m_DimCount+i];
			
			// calculate Root vector - by Chol*Root = Vec2Mean
			TriSolve(pChol, pVec2Mean, pRoot, m_DimCount);
		
			// add half of Root vector squared to log p
			for(i=0; i<m_DimCount; i++) 
				Mahal += pRoot[i]*pRoot[i];
			

			// Score is given by Mahal/2 + log RootDet - log weight
			OptPtrLogP[c] = Mahal/2
					+ LogRootDet
					- log(m_pWeight[c]) 
					+ log(2*PI)*m_DimCount/2;			
			
/*
			m_pLogP[p*m_MaxClusters + c] = Mahal/2 
									+ LogRootDet //+ log(RootDet) 
									- log(m_pWeight[c]) 
									+ log(2*M_PI)*m_DimCount/2;
*/
			
#if 0
			if (Debug) {
				if (p==0) {
					Output("Cholesky\n");
					MatPrint(stdout, pChol, m_DimCount, m_DimCount);
					Output("root vector:\n");
					MatPrint(stdout, pRoot, 1, m_DimCount);
					Output("Score = %.3g + %.3g - %.3g = %.3g\n", Mahal/2, LogRootDet
					/*log(RootDet)*/ , log(Weight[c]), LogP[p*MaxClusters + c]);		
				}
			}
#endif
		}	// for (p=0; p<m_ItemCount
	}	// for (cc=1; cc<nClustersAlive
	//Output("Skipped %d ", nSkipped);
	delete [] pChol;
	delete [] pVec2Mean;
	delete [] pRoot;

}

// Choose best class for each point (and second best) out of those living
void CCluster::CStep()
{
	int p, c, cc, TopClass, SecondClass;
	float ThisScore, BestScore, SecondScore;
	
	for (p=0; p<m_ItemCount; p++) 
	{
		m_pOldClass[p] = m_pClass[p];
		BestScore = FLT_MAX;
		SecondScore = FLT_MAX;
		TopClass = SecondClass = 0;
		for (cc=0; cc<m_nClustersAlive; cc++) 
		{
			c=m_pAliveIndex[cc];
			ThisScore = m_pLogP[p*s_MaxPossibleClusters + c];
			if (ThisScore < BestScore) 
			{
				SecondClass = TopClass;
				TopClass = c;
				SecondScore = BestScore;
				BestScore = ThisScore;
			}
			else if (ThisScore < SecondScore) 
			{
				SecondClass = c;
				SecondScore = ThisScore;
			}
		}
		m_pClass[p] = TopClass;
		m_pClass2[p] = SecondClass;

//Output("%.2g %.2g   ", LogP[p*MaxClusters], LogP[p*MaxClusters +1]);
	}

}

// Sometimes deleting a cluster will improve the score, when you take into account
// the BIC. This function sees if this is the case.  It will not delete more than
// one cluster at a time.
void CCluster::ConsiderDeletion()
{
	int c, p, CandidateClass;
	float Loss, DeltaPen;
	float *pDeletionLoss=new float[s_MaxPossibleClusters];
	
	for(c=0; c<s_MaxPossibleClusters; c++) 
	{
		if (m_pbClassAlive[c]) 
			pDeletionLoss[c] = 0;
		else 
			pDeletionLoss[c] = FLT_MAX; // don't delete classes that are already there
	}
	
	// compute losses by deleting clusters
	for(p=0; p<m_ItemCount; p++) 
	{
		pDeletionLoss[m_pClass[p]] += m_pLogP[p*s_MaxPossibleClusters + m_pClass2[p]] - m_pLogP[p*s_MaxPossibleClusters + m_pClass[p]];
	}	
	
	// find class with least to lose
	Loss = FLT_MAX;
	for(c=1; c<s_MaxPossibleClusters; c++) 
	{
		if (pDeletionLoss[c]<Loss) 
		{
			Loss = pDeletionLoss[c];
			CandidateClass = c;
		}
	}
	
	// what is the change in penalty?
	DeltaPen = Penalty(m_nClustersAlive) - Penalty(m_nClustersAlive-1);
	
	//Output("cand Class %d would lose %f gain is %f\n", CandidateClass, Loss, DeltaPen);
	// is it worth it?
	if (Loss<DeltaPen) 
	{
		Output("Deleting Class %d. Lose %f but Gain %f\n", CandidateClass, Loss, DeltaPen);

		// set it to dead
		m_pbClassAlive[CandidateClass] = false;
		
		// re-allocate all of its points
		for(p=0;p<m_ItemCount; p++) 
			if(m_pClass[p]==CandidateClass) 
				m_pClass[p] = m_pClass2[p];
	}
	delete [] pDeletionLoss;
	Reindex();
}



// Cholesky Decomposition
// In provides upper triangle of input matrix (In[i*D + j] >0 if j>=i);
// which is the top half of a symmetric matrix
// Out provides lower triange of output matrix (Out[i*D + j] >0 if j<=i);
// such that Out' * Out = In.
// D is number of dimensions
//
// returns 0 if OK, returns 1 if matrix is not positive definite
int CCluster::Cholesky(float *In, float *Out, int D) 
{
	int i, j, k;
	float sum;
	
	
	// empty output array
	for (i=0; i<D*D; i++) Out[i] = 0;
	
	// main bit
	for (i=0; i<D; i++) {
		for (j=i; j<D; j++) {	// j>=i
			sum = In[i*D + j];

			for (k=i-1; k>=0; k--) sum -= Out[i*D + k] * Out[j*D + k]; // i,j >= k
			if (i==j) {
				if (sum <=0) 
					return(1); // Cholesky decomposition has failed
				Out[i*D + i] = sqrt(sum);
			}
			else {
				Out[j*D + i] = sum/Out[i*D + i];
			}
		}
	}
	
	
	return 0; // for sucess
}

// Solve a set of linear equations M*Out = x.
// Where M is lower triangular (M[i*D + j] >0 if j>=i);
// D is number of dimensions
void CCluster::TriSolve(float *M, float *x, float *Out, int D) 
{
	int i, j;
	float sum;
	
	for(i=0; i<D; i++) {
		sum = x[i];
		for (j=i-1; j>=0; j--) sum -= M[i*D + j] * Out[j]; // j<i
		
//		for (pM=M + i*D + i-1, pOut = Out + i-1; pOut>=Out; pM--, pOut--) sum -= *pM * *pOut;
		Out[i] = sum / M[i*D + i];
	}
}

#if 0
void CCluster::SaveOutput()
{
	int p, c;
	int MaxClass = 0;
	int *pNotEmpty;
	pNotEmpty=new int[m_MaxClusters];
	int *pNewLabel;
	pNewLabel=new int[m_MaxClusters];
	
	// find non-empty clusters
	for(c=0;c<m_MaxClusters;c++) 
		pNewLabel[c] = pNotEmpty[c] = 0;
	for(p=0; p<m_ItemCount; p++) 
		pNotEmpty[m_pBestClass[p]] = 1;
	
	// make new cluster labels so we don't have empty ones
	MaxClass = 0;
	for(c=0;c<m_MaxClusters;c++) 
	{
		if (pNotEmpty[c]) 
		{
			MaxClass++;
			pNewLabel[c] = MaxClass;
		}
	}
	
	// print file
	TRY
	{
		CStdioFile F("output.txt",CFile::modeCreate|CFile::modeWrite|CFile::typeText);
		CString str;
		str.Format("%d\n", MaxClass);	// number of classes found
		F.WriteString(str);
		for (p=0; p<m_ItemCount; p++)
		{
			str.Format("%d\n", pNewLabel[m_pBestClass[p]]);	// which class item p belongs to 
			F.WriteString(str);
		}

		F.Close();
	}
	CATCH( CFileException, e )
	{
	   #ifdef _DEBUG
		  afxDump << "File could not be opened "
				  << e->m_cause << "\n";
	   #endif
	}
	END_CATCH
	
	delete [] pNotEmpty;
	delete [] pNewLabel;
}
#endif

void CCluster::DelWorkingArrays()
{
	if (m_pData!=NULL)
		delete [] m_pData;
	if (m_pWeight!=NULL)
		delete [] m_pWeight;
	if (m_pMean!=NULL)
		delete [] m_pMean;
	if (m_pCov!=NULL)
		delete [] m_pCov;
	if (m_pLogP!=NULL)
		delete [] m_pLogP;
	if (m_pClass!=NULL)
		delete [] m_pClass;
	if (m_pOldClass!=NULL)
		delete [] m_pOldClass;
	if (m_pClass2!=NULL)
		delete [] m_pClass2;
	if (m_pBestClass!=NULL)
		delete [] m_pBestClass;
	if (m_pbClassAlive!=NULL)
		delete [] m_pbClassAlive;
	if (m_pAliveIndex!=NULL)
		delete [] m_pAliveIndex;

	m_pData=m_pWeight=m_pMean=m_pCov=m_pLogP=NULL; 
	m_pClass=m_pOldClass=m_pClass2=m_pBestClass=m_pAliveIndex=NULL;
	m_pbClassAlive=NULL;
}

void CCluster::AllocateArrays()
{
	m_DimCount2=m_DimCount*m_DimCount;
	m_NoisePoint=1;
	m_bFullStep=true;

	// Set sizes for arrays	
	m_pData=new float[m_DimCount*m_ItemCount];
	m_pWeight=new float[s_MaxPossibleClusters];
	m_pMean=new float[s_MaxPossibleClusters*m_DimCount];
	m_pCov=new float[s_MaxPossibleClusters*m_DimCount2];
	m_pLogP=new float[s_MaxPossibleClusters*m_ItemCount];
	m_pClass=new int[m_ItemCount];
	m_pOldClass=new int[m_ItemCount];
	m_pClass2=new int[m_ItemCount];
	m_pBestClass=new int[m_ItemCount];
	m_pbClassAlive=new bool[s_MaxPossibleClusters];
	m_pAliveIndex=new int[s_MaxPossibleClusters];
}


int CCluster::DoPartition(float **ppDat, int nItems, int nDims, int *pClassID,CString LogFileName, float& BestScore)
{
	if (!LogFileName.IsEmpty())	// open log file
	{
		if (!m_LogFile.Open(LogFileName,CFile::modeCreate|CFile::modeNoTruncate|CFile::modeWrite|CFile::typeText)) 
			AfxMessageBox(_T("Couldn't open log file"));
	}


	Output("\n\nSTARTING A PARTITION RUN\n\n");

// data comes in as DimCount arrays, each ItemCount long
// returns num clusters found, and list of showing cluster ID of each item
	ASSERT(pClassID!=NULL);

	float min,max;
	int i,j,p,c;

	m_DimCount=nDims;
	m_ItemCount=nItems;

	DelWorkingArrays();	
	AllocateArrays();
	p=0;

// make local copy, so as not to change original data when normalise
// local data is linear
	for (i=0; i<m_ItemCount; i++)
		for (j=0; j<m_DimCount; j++)
			m_pData[p++]=ppDat[j][i];

#if 0
	TRACE("in CCluster::DoPartition\n");
	for (i=0; i<10; i++)
		TRACE("%d: %f\n",i,m_pData[i]);
#endif


// normalize data
	for(i=0; i<m_DimCount; i++) 
	{
		//calculate min and max
		min = FLT_MAX; 
		max=-FLT_MAX;
		for(p=0; p<m_ItemCount; p++) 
		{
			float val = m_pData[p*m_DimCount + i];
			if (val > max) max = val;
			if (val < min) min = val;
		}
		
		// now normalize
		for(p=0; p<m_ItemCount; p++) 
			m_pData[p*m_DimCount+i] = (m_pData[p*m_DimCount+i] - min) / (max-min);
	}

#if 0
	TRACE("in CCluster::DoPartition, after normalise\n");
	for (i=0; i<10; i++)
		TRACE("%d: %f\n",i,m_pData[i]);
#endif

// seed random

	double Score;
	BestScore = FLT_MAX;

	int maxStartingClusters=min(s_MaxClusters,s_MaxPossibleClusters);
// do a CEM for all cluster count starting values between min and max clusters
	for(m_nStartingClusters=s_MinClusters; m_nStartingClusters<=maxStartingClusters; m_nStartingClusters++) 
	{
		for (i=0; i<CCluster::s_NumStarts; i++)
		{
			Output("Starting with %d clusters\n", m_nStartingClusters);

			// do CEM iteration
			Score = CEM();
			
			Output("%d->%d Clusters: Score %f, best is %f\n", m_nStartingClusters, m_nClustersAlive, Score, BestScore);
			
			if (Score < BestScore) 
			{
				Output("THE BEST YET!\n");
				// New best classification found
				BestScore = Score;
				for(p=0; p<m_ItemCount; p++) 
					m_pBestClass[p] = m_pClass[p];
	//			SaveOutput();	// don't understand why here, since overwritten at end (safety in case crashes???)
			}
			Output("\n");
		}
	}
	
//	SaveOutput();

	int MaxClass = 0;
	int *pNotEmpty;
	pNotEmpty=new int[s_MaxPossibleClusters];
	int *pNewLabel;
	pNewLabel=new int[s_MaxPossibleClusters];
	
	// find non-empty clusters
	for(c=0;c<s_MaxPossibleClusters;c++) 
		pNewLabel[c] = pNotEmpty[c] = 0;
	for(p=0; p<m_ItemCount; p++) 
		pNotEmpty[m_pBestClass[p]] = 1;
	
	// make new cluster labels so we don't have empty ones
	MaxClass = 0;
	for(c=0;c<s_MaxPossibleClusters;c++) 
	{
		if (pNotEmpty[c]) 
		{
			MaxClass++;
			pNewLabel[c] = MaxClass;
		}
	}

	for (p=0; p<m_ItemCount; p++)
		pClassID[p]=pNewLabel[m_pBestClass[p]];	// which class item p belongs to 

	DelWorkingArrays();
	delete [] pNotEmpty;
	delete [] pNewLabel;

	if (m_LogFile.m_pStream!=NULL)
		m_LogFile.Close();
	return MaxClass;
}

void CCluster::WriteToLog(CString op)
{
	if (m_LogFile.m_pStream!=NULL)
		m_LogFile.WriteString(op);
}


int CCluster::irand(int min, int max)
{
	return (rand() % (max - min + 1) + min);
}



// Write to screen and log file
void CCluster::Output(char *fmt, ...) 
{
	va_list arg;
	va_start(arg, fmt);
	char buf[1000];
	_vsnprintf(buf,1000,fmt,arg);
	CString op(buf);
	WriteToLog(op);
#if DO_TRACE
	TRACE(op);
#endif
	va_end(arg);
}
