#pragma once

class CCluster
{
public:
	CCluster(void);
	~CCluster(void);
	CCluster(CCluster *pCluster);	// copy constructor???



public:
	static int s_MinClusters; // Min and MaxClusters includes cluster 1, the noise cluster
	static int s_MaxClusters;
	static int s_MaxPossibleClusters; // splitting can't make it exceed this
	static int s_NumStarts; // number of times to start count from each number of clusters
	static int s_RandomSeed;
	static int s_DistDump;
	static float s_DistThresh; // Points with at least this much difference from 
				// the best do not get E-step recalculated - and that's most of them
	static int s_FullStepEvery;		// But there is always a full estep this every this many iterations
	static float s_ChangedThresh;	// Or if at least this fraction of points changed class last time
	static int s_MaxIter; // max interations
	static int s_SplitEvery;
	static float s_PenaltyMix;		// amount of BIC to use for penalty, must be between 0 and 1
	
	int DoPartition(float **ppDat, int nItems, int nDims, int *pClassID,CString m_LogFileName,float& bestScore);
#ifdef _DEBUG
	int m_nSplitError;
#endif

private:
	int m_Log;
	CStdioFile m_LogFile;
	void WriteToLog(CString str);
	void DelWorkingArrays();
//	void SaveOutput();
	double CEM(bool recurse = true);
	void MStep();
	void EStep();
	void CStep();
	void ConsiderDeletion();
	double Penalty(int nClustersAlive);
	int Cholesky(float *m_In, float *m_Out, int D);
	void TriSolve(float *M, float *x, float *Out, int D);
	double ComputeScore();
	bool TrySplits();
	void AllocateArrays();
	void Reindex();
	int irand(int min, int max);

// Write to screen and log file
	void Output(char *fmt, ...);

	int m_DimCount;
	int m_ItemCount;

	// GLOBAL VARIABLES
	int m_DimCount2; // nDims2 is nDims squared
	int m_nStartingClusters;
	int m_nClustersAlive;
	int m_PointCount;
	bool m_bFullStep; // Indicates that the next E-step should be a full step (no time saving)
	int m_NoisePoint;	// number of fake points always in noise cluster to ensure 
    			// noise weight>0.  Default is 1.

	float *m_pData; // Data[p*nDims + d] = Input data for poitn p, dimension d
	float *m_pWeight; // Weight[c] = Class weight for class c
	float *m_pMean; // Mean[c*nDims + d] = cluster mean for cluster c in dimension d
	float *m_pCov; // Cov[c*nDims*nDims + i*nDims + j] = Covariance for cluster C, entry i,j
					  // NB covariances are stored in upper triangle (j>=i)
	float *m_pLogP; // LogP[p*MaxClusters + c] = minus log likelihood for point p in cluster c

	int *m_pClass; // Class[p] = best cluster for point p
	int *m_pOldClass; // Class[p] = best cluster for point p
	int *m_pClass2; // Class[p] = second best cluster for point p
	int *m_pBestClass; // BestClass = best classification yet achieved
	bool *m_pbClassAlive; // contains 1 if the class is still alive - otherwise 0
    int *m_pAliveIndex; // a list of the alive classes to iterate over
};
