forked from tingchenlab/CROP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bayesianclustering.h
148 lines (134 loc) · 4.03 KB
/
bayesianclustering.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#ifndef __BAYESIANCLUSTERING_H__
#define __BAYESIANCLUSTERING_H__
#include <gsl/gsl_math.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <iomanip>
#include "common.h"
#include "alignment.h"
using namespace std;
//Global Variables
//*******************************
#define MAXSize 20000
/*
//Golden Standard Store
//*******************************
extern int StandardZ[MAXSize];
extern int Standardk;
extern int StandardNN[MAXSize];
extern double Purity,Pu;
extern double VI,V;
extern int EditDistance;
extern double ED;
*/
//Parameters
//*******************************
typedef struct _Parameters{
int N;
int Step;
int k;
double* Distrk;
double* Time;
//extern int* Indicator;
double* Pi;
double* PiPrev;
int* Center;
double* Sigma;
bool* SigmaChange;
double* Likelihood;
double* LikelihoodRatio; //for UpdateLikelihood;
//************************************
double** LikelihoodTemp;
//************************************
char** SeqID; //for Sequence ID
char** seq; //for reading Fasta
char*** EigenSeqs; //for further iteration use
float** X; //Distance Matrix
//double X2[MAXSize][MAXSize];
int* Z; //missing data
int* NN; //NN[i-1]=# of samples in i-th cluster
int* IterNN; //Record # of seqs in a cluster in this round (everyone is a center seq in previous round)
int* TempNN;
int* Abundance; //for further iteration use
double* Centerlikelihood; //square sum of current center
gsl_rng *r;
bool* Weight; //for Birth-Death control;
unsigned long int WeightSum;
//*******************************
//Result Process
//*******************************
int Resultk;
double* ResultPi;
int* ResultCenter;
double* ResultSigma;
double* ResultLikelihood;
//*******************************
//Hyperparameters
//*******************************
//For Pi
double* Gamma;
//For Sigma
double Alpha;
double Beta;
double g;
double h;
double SigmaSum;
double Lower, Upper;
//For New-born Pi
double Gamma1;
//For Birth-rate
double Lamdab;
//For Stationary Time
double Lamda;
//*******************************
//Time Inspection
//*******************************
double TimeZ;
double TimeCenter;
double TimeSigma;
double TimePi;
double TimeBirth;
double TimeDeath;
double TimeLikelihood;
}Parameters;
//Result Storage
//********************************
typedef struct _BayesianClusteringResult{
int ClusterNumber;
char** ClusterCenterID;
char** ClusterCenterSeq;
double* ClusterStandardDeviation;
int* ClusterSize;
int* ClusterIterationSize;
string** ClusterMember;
string** ClusterMemberSeq;
bool** ClusterMemberFlag;
}BayesianClusteringResult;
//Functions
//*******************************
void Initialize(Parameters&);
void InitRandomGenerator(Parameters&);
char** readBlockSeq(const char*, unsigned int*, int, Parameters&);
void LoadDistanceMatrix(const char*, int, Parameters&);
int GenerateInitial(int, Parameters&);
int UpdateZ(Parameters&);
int UpdatePi(Parameters&);
int UpdateCenter(int, bool, Parameters&);
int UpdateSigma(Parameters&);
int UpdateLikelihoodRatio(Parameters&);
int Birth(int,Parameters&);
int Death(int,int,Parameters&);
int ProcessSampling(int, int,Parameters&);
int ResultProcess(Parameters&);
//int CriteriaCalc(int);
double factorial(int,int);
double gaussian(double, double, Parameters&);
void NewMem(int,Parameters&);
void DeleteMem(int,int,Parameters&);
BayesianClusteringResult BayesianClustering(const char*,int, double, double, BayesianClusteringResult&, int, Parameters&);
extern "C"
{
char** readFastASeq(char*, unsigned int*, Parameters&);
}
//*******************************
#endif//__BAYESIANCLUSTERING_H__