00001 #ifndef GraphSegmentation_H
00002 #define GraphSegmentation_H
00003
00004
00005
00006
00007
00008
00009 #include "Som.h"
00010
00011 class GraphSegmentation {
00012 public:
00013
00014
00015 GraphSegmentation(){};
00016
00017
00018 TIntVector Segment( SOM* som, int min_neurons_per_cluster );
00019
00020 Value_Type Davies_Bouldin( Value_Type p, Value_Type q );
00021
00022 Value_Type Davies_Bouldin_Data( Value_Type p, Value_Type q );
00023
00024 int getNumClusters() { return _num_clusters; };
00025
00026 TIntVector Siponen();
00027
00028
00029
00030
00031 private:
00032
00033 void Init( SOM* som );
00034
00036 TMatrix Centroid( );
00037
00038 TMatrix Distance( );
00039
00040 TVector Winner( );
00041
00042 TVector MeanDistance( );
00043
00044 TMatrix AdjacentNode( );
00045
00046 Value_Type Hmin( );
00047
00048 void Label( int i, int& group, TIntVector& vec, bool& changed );
00049
00050
00051 private:
00052 SOM * _som;
00053
00054 TMatrix _adjacentNode;
00055 TMatrix _distance;
00056 TVector _winner;
00057 TVector _meanDistance;
00058 TMatrix _centroid;
00059
00060 Value_Type _Hmin;
00061 Value_Type _delta;
00062 int m;
00063
00064 TIntVector _clusters;
00065 TVector _S;
00066 TMatrix _M;
00067 int _num_clusters;
00068 TIntVector _num_vector_cluster;
00069 TMatrix _cluster_centroid;
00070 TMatrix _R;
00071
00072 };
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157 TIntVector
00158 GraphSegmentation::Siponen()
00159 {
00160 int i, j, k;
00161 int d = _som->getMapcode()->getNumVar();
00162 TMatrix codebook = _som->getMapcode()->getCodebook();
00163
00164 TIntVector s_vector = create_intvector( 1, _num_clusters );
00165 TMatrix max_value = create_matrix( 1, _num_clusters, 1, d );
00166 TMatrix min_value = create_matrix( 1, _num_clusters, 1, d );
00167 TMatrix mean_value = create_matrix( 1, _num_clusters, 1, d );
00168 TVector count = create_vector( 1, _num_clusters );
00169
00170 for (i=1; i <= _num_clusters; i++) {
00171 count[i] = 0;
00172 s_vector[i] = 0;
00173 for (j=1; j <= d; j++) {
00174 max_value[i][j] = -99999;
00175 min_value[i][j] = 99999;
00176 mean_value[i][j] = 0;
00177 }
00178 }
00179
00180 for (i=1; i<= _num_clusters; i++ ) {
00181 for (j=1; j<=m; j++ ) {
00182 if ( _clusters[j] == i ) {
00183 for (k=1; k<=d; k++ ) {
00184 if ( codebook[j][k] > max_value[i][k] )
00185 max_value[i][k] = codebook[j][k];
00186 if ( codebook[j][k] < min_value[i][k] )
00187 min_value[i][k] = codebook[j][k];
00188 mean_value[i][k] += codebook[j][k];
00189 }
00190 count[i]++;
00191 }
00192 }
00193 }
00194
00195 for (i=1; i <= _num_clusters; i++)
00196 for (k=1; k <= d; k++) {
00197 if ( count[i] != 0 )
00198 mean_value[i][k] /= count[i];
00199 else
00200 mean_value[i][k] = 0;
00201 }
00202
00203 TMatrix s_v = create_matrix( 1, _num_clusters, 1, d );
00204 for (i=1; i <= _num_clusters; i++)
00205 for (k=1; k <= d; k++) {
00206 if ( (( max_value[i][k] - min_value[i][k] )) != 0 )
00207 s_v[i][k] = ( mean_value[i][k] - min_value[i][k] )/( max_value[i][k] - min_value[i][k] );
00208 else
00209 s_v[i][k] = 0;
00210 }
00211
00212 Value_Type sum;
00213 TMatrix s_vc = create_matrix( 1, _num_clusters, 1, d );
00214 for (i=1; i <= _num_clusters; i++)
00215 for (k=1; k <= d; k++) {
00216 sum = 0;
00217 for (j=1; j<= _num_clusters; j++ )
00218 if ( j != i )
00219 sum += s_v[j][k];
00220 if ( sum != 0 )
00221 s_vc[i][k] = s_v[i][k] / ( ( 1.0/((Value_Type)_num_clusters - 1.0) )*sum );
00222 else
00223 s_vc[i][k] = 0;
00224 }
00225
00226
00227
00228
00229
00230 Value_Type s_max_value;
00231
00232 for (i=1; i<= _num_clusters; i++ ) {
00233 s_max_value = -1.0;
00234 for (k=1; k <= d; k++) {
00235 if (s_vc[i][k] > s_max_value ) {
00236 s_vector[i] = k;
00237 s_max_value = s_vc[i][k];
00238 }
00239 }
00240 }
00241
00242 return s_vector;
00243 };
00244
00245 Value_Type
00246 GraphSegmentation::Davies_Bouldin_Data( Value_Type p, Value_Type q )
00247 {
00248 int ii, i, j, k, l;
00249 int d = _som->getMapcode()->getNumVar();
00250 TMatrix entries = _som->getData()->getEntries();
00251
00252 TVector _S = create_vector( 1, _num_clusters );
00253 TMatrix _cluster_centroid = create_matrix(1, _num_clusters, 1, d );
00254 TIntVector _num_vector_cluster = create_intvector( 1, _num_clusters );
00255 TMatrix _M = create_matrix( 1, _num_clusters,1, _num_clusters );
00256 TMatrix _R = create_matrix( 1, _num_clusters,1, _num_clusters );
00257
00258 TvecLabel entries_label = _som->getData()->getId();
00259 TLabel codebook_label = _som->getMapcode()->getLabel();
00260 int n = _som->getData()->getDataSize();
00261
00262 for (i=1; i<= _num_clusters; i++ ) {
00263 _S[i] = 0.0;
00264 _num_vector_cluster[i] = 0;
00265 for (j=1; j <= _num_clusters; j++ ) _R[i][j] = _M[i][j] = 0.0;
00266 for (j=1; j<= d; j++ ) _cluster_centroid[i][j] = 0.0;
00267 }
00268
00269
00270 TVector count = create_vector( 1, _num_clusters ); for (i=1;i<=_num_clusters; i++) count[i] = 0.0;
00271 for (i=1; i<=m; i++ ) {
00272 if ( _clusters[i] > 0 ) {
00273 for (l=0; l< codebook_label[i-1].size(); l++ )
00274 for (j=1; j<=n; j++) {
00275 if ( codebook_label[i-1][l] == entries_label[j-1] ) {
00276 for (k=1; k<=d; k++ ) {
00277 _cluster_centroid[_clusters[i] ][k] += entries[j][k];
00278 }
00279 ++count[ _clusters[i] ];
00280 break;
00281 }
00282 }
00283 }
00284 }
00285
00286 for (i=1; i<= _num_clusters; i++) for (k=1; k<=d; k++ ) _cluster_centroid[i][k] /= count[i];
00287
00288
00289 Value_Type sum;
00290 for (i=1; i<= _num_clusters; i++)
00291 for (j=1; j<= _num_clusters; j++) {
00292 sum = 0.0;
00293 for (k=1; k<= d; k++) sum += pow( fabs(_cluster_centroid[i][k] - _cluster_centroid[j][k]), p );
00294 _M[i][j] = pow( sum, (1.0/p) );
00295 }
00296
00297
00298 for (i=1; i<=_num_clusters; i++) count[i] = 0.0;
00299
00300 for (ii=1; ii<= _num_clusters; ii++)
00301 for (i=1; i<=m; i++ ) {
00302 if ( _clusters[i] == ii ) {
00303 for (l=0; l< codebook_label[i-1].size(); l++ )
00304 for (j=1; j<=n; j++) {
00305 if ( codebook_label[i-1][l] == entries_label[j-1] ) {
00306 sum = 0.0;
00307 for (k=1; k<=d; k++ ) {
00308 sum += pow( fabs( entries[j][k] - _cluster_centroid[ii][k]), q );
00309 }
00310 _S[ii] += sum;
00311 count[ii]++;
00312 break;
00313 }
00314 }
00315 }
00316 }
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328 for (i=1; i<= _num_clusters; i++) _S[i] = pow( (_S[i]/count[i]) ,(1.0/q) );
00329
00330
00331
00332
00333
00334
00335
00336 for (i=1; i<= _num_clusters; i++)
00337 for (j=1; j<= _num_clusters; j++)
00338 if ( i!=j )
00339 _R[i][j] = ( _S[i] + _S[j] )/ _M[i][j];
00340 else
00341 _R[i][j] = 0;
00342
00343
00344
00345 Value_Type result = 0.0, max;
00346 for (i=1; i<= _num_clusters; i++) {
00347 max = 0.0;
00348 for (j=1; j<= _num_clusters; j++) {
00349 if (_R[i][j] > max)
00350 max = _R[i][j];
00351 }
00352 result += max;
00353 }
00354
00355 result /= (Value_Type)_num_clusters;
00356
00357 return result;
00358 };
00359
00360 Value_Type
00361 GraphSegmentation::Davies_Bouldin( Value_Type p, Value_Type q )
00362 {
00363 int i, j, k;
00364 int d = _som->getMapcode()->getNumVar();
00365 TMatrix codebook = _som->getMapcode()->getCodebook();
00366
00367
00368 TVector _S = create_vector( 1, _num_clusters );
00369 TMatrix _cluster_centroid = create_matrix(1, _num_clusters, 1, d );
00370 TIntVector _num_vector_cluster = create_intvector( 1, _num_clusters );
00371 TMatrix _M = create_matrix( 1, _num_clusters,1, _num_clusters );
00372 TMatrix _R = create_matrix( 1, _num_clusters,1, _num_clusters );
00373
00374 for (i=1; i<= _num_clusters; i++ ) {
00375 _S[i] = 0.0;
00376 _num_vector_cluster[i] = 0;
00377 for (j=1; j <= _num_clusters; j++ ) _R[i][j] = _M[i][j] = 0.0;
00378 for (j=1; j<= d; j++ ) _cluster_centroid[i][j] = 0.0;
00379 }
00380
00381
00382 TVector count = create_vector( 1, _num_clusters ); for (i=1;i<=_num_clusters; i++) count[i] = 0.0;
00383 for (i=1; i<=m; i++)
00384 if ( _clusters[i] > 0 )
00385 {
00386 for (k=1;k<=d;k++)
00387 {
00388 _cluster_centroid[_clusters[i] ][k] += codebook[i][k];
00389 }
00390 ++count[ _clusters[i] ];
00391 }
00392 for (i=1; i<= _num_clusters; i++) for (k=1; k<=d; k++ ) _cluster_centroid[i][k] /= count[i];
00393
00394
00395 Value_Type sum;
00396 for (i=1; i<= _num_clusters; i++)
00397 for (j=1; j<= _num_clusters; j++) {
00398 sum = 0.0;
00399 for (k=1; k<= d; k++) sum += pow( fabs(_cluster_centroid[i][k] - _cluster_centroid[j][k]), p );
00400 _M[i][j] = pow( sum, (1.0/p) );
00401 }
00402
00403
00404
00405 for (i=1; i<=_num_clusters; i++) count[i] = 0.0;
00406
00407 for (i=1; i<= _num_clusters; i++)
00408 for (j=1; j<=m; j++ ) {
00409 if ( _clusters[j] == i ) {
00410 count[i]++;
00411 sum = 0.0;
00412 for (k=1; k<=d; k++) sum += pow( fabs( codebook[j][k] - _cluster_centroid[i][k]), q );
00413 _S[i] += sum;
00414 }
00415 }
00416 for (i=1; i<= _num_clusters; i++) _S[i] = pow( (_S[i]/count[i]) ,(1.0/q) );
00417
00418
00419
00420
00421
00422
00423
00424 for (i=1; i<= _num_clusters; i++)
00425 for (j=1; j<= _num_clusters; j++)
00426 if ( i!=j )
00427 _R[i][j] = ( _S[i] + _S[j] )/ _M[i][j];
00428 else
00429 _R[i][j] = 0;
00430
00431
00432
00433 Value_Type result = 0.0, max;
00434 for (i=1; i<= _num_clusters; i++) {
00435 max = 0.0;
00436 for (j=1; j<= _num_clusters; j++) {
00437 if (_R[i][j] > max)
00438 max = _R[i][j];
00439 }
00440 result += max;
00441 }
00442
00443 result /= (Value_Type)_num_clusters;
00444
00445 return result;
00446 };
00447
00448
00449 void
00450 GraphSegmentation::Init( SOM* som )
00451 {
00452 _som = som;
00453 _adjacentNode = AdjacentNode();
00454 _distance = Distance( );
00455 _winner = Winner( );
00456 _meanDistance = MeanDistance( );
00457 _centroid = Centroid( );
00458 _Hmin = Hmin( );
00459 m = _som->getMapcode()->getMapSize();
00460 _delta = 0.5;
00461
00462 }
00463
00464 void
00465 GraphSegmentation::Label( int i, int& group, TIntVector& vec, bool & changed ) {
00466 int j;
00467
00468 for (j=1; j<=m; j++)
00469 if ((_adjacentNode[i][j] == 1) && (vec[j] == 0)) {
00470 changed = true;
00471 _adjacentNode[i][j] = 0;
00472 vec[j] = group;
00473 if ( i != j )
00474 Label( j, group, vec, changed );
00475 }
00476 };
00477
00478 TIntVector
00479 GraphSegmentation::Segment( SOM* som, int min_neurons_per_cluster ) {
00480
00481 Init( som );
00482 int i, j;
00483
00484 for (i=1; i<=m; i++)
00485 for (j=1; j<=m; j++)
00486 if ( _adjacentNode[i][j] == 1 ) {
00487 if (( _distance[i][j] > 2*_meanDistance[i]) ||
00488 ( _distance[i][j] > 2*_meanDistance[j]) )
00489 _adjacentNode[i][j] = 0;
00490 if ((_winner[i] == 0) ||
00491 (_winner[j] == 0) ||
00492 (( _winner[i] < _Hmin ) && (_winner[j] < _Hmin )) )
00493 _adjacentNode[i][j] = 0;
00494 if ( _centroid[i][j] > 2*_distance[i][j] )
00495 _adjacentNode[i][j] = 0;
00496 }
00497
00498 TIntVector tmp = create_intvector( 1, m ); for (i=1;i<=m;i++) tmp[i] = 0;
00499 int group = 1;
00500 bool changed = false;
00501
00502 for ( i=1; i<= m; i++ ) {
00503 Label( i, group, tmp, changed );
00504 if ( changed ) {
00505 group++;
00506 changed = false;
00507 }
00508 };
00509
00510 int MaxNumCluster = 0;
00511 for (i=1; i<=m; i++) if (tmp[i] > MaxNumCluster) MaxNumCluster = i;
00512
00513 TIntVector tmp_clusters = create_intvector( 0, MaxNumCluster ); for (i=0;i<=MaxNumCluster;i++) tmp_clusters[i] = 0;
00514 for (i=1; i<=m; i++) tmp_clusters[tmp[i]]++;
00515 for (i=1; i<=m; i++) if ( (tmp[i]>0) && ( tmp_clusters[tmp[i]] < min_neurons_per_cluster ) )
00516 {
00517 tmp[i] = 0;
00518 };
00519
00520
00521 vector<int> vec, vec2;
00522 for (i=1;i<=m;i++) vec.push_back( tmp[i] );
00523 sort( vec.begin(), vec.end() );
00524
00525 int tmp_n = vec[0];
00526 for (i=0;i<m;i++) if (vec[i] != tmp_n)
00527 {
00528 tmp_n = vec[i];
00529 vec2.push_back( vec[i] );
00530 }
00531 _num_clusters = vec2.size();
00532 for (i=1;i<=m;i++)
00533 for (j=0; j< _num_clusters; j++ )
00534 if ( tmp[i] == vec2[j] )
00535 tmp[i] = j+1;
00536
00537
00538
00539 free_intvector( tmp_clusters, 0, MaxNumCluster );
00540
00541 _clusters = tmp;
00542 return tmp;
00543 };
00544
00545 TMatrix
00546 GraphSegmentation::Centroid( ) {
00547 int m = _som->getMapcode()->getMapSize();
00548 int n = _som->getData()->getDataSize();
00549 int d = _som->getMapcode()->getNumVar();
00550 int c;
00551 TMatrix entries = _som->getData()->getEntries();
00552 TMatrix codebook = _som->getMapcode()->getCodebook();
00553 int i, j;
00554
00555 TMatrix centroids = create_matrix( 1, m, 1, d );
00556 for (i=1; i<=m; i++) for (j=1; j<=d; j++) centroids[i][j] = 0.0;
00557 TVector count = create_vector( 1, m );
00558 for (j=1; j<=m; j++) count[j] = 0;
00559
00560 Value_Type min_dist;
00561
00562 for (i=1; i<=n; i++) {
00563 min_dist = 9999999;
00564 for (j=1; j<=m; j++)
00565 if ( dist( entries[i], codebook[j], d ) < min_dist ) {
00566 min_dist = dist( entries[i], codebook[j], d );
00567 c = j;
00568 }
00569 for (j=1; j<=d; j++) centroids[c][j] += entries[i][j];
00570 count[c]++;
00571 }
00572
00573 for (i=1; i<=m; i++) for (j=1; j<=d; j++)
00574 if ( count[i] != 0 )
00575 centroids[i][j] /= count[i];
00576
00577 TMatrix dist_centroids = create_matrix( 1, m, 1, m );
00578 for (i=1; i<=m; i++) for (j=1; j<=m; j++) dist_centroids[i][j] = 0.0;
00579
00580 for (i=1; i<=m; i++) for (j=1; j<=m; j++)
00581 dist_centroids[i][j] = dist( centroids[i], centroids[j], d );
00582
00583 return dist_centroids;
00584 };
00585
00586 TMatrix
00587 GraphSegmentation::Distance( ) {
00588 int m = _som->getMapcode()->getMapSize();
00589 int d = _som->getMapcode()->getNumVar();
00590 TMatrix codebook = _som->getMapcode()->getCodebook();
00591 int i, j;
00592
00593 TMatrix tmp = create_matrix( 1, m, 1, m );
00594
00595 for (i=1; i<=m; i++)
00596 for (j=1; j<=m; j++)
00597 tmp[i][j] = dist( codebook[i], codebook[j], d );
00598
00599 return tmp;
00600 };
00601
00602 TVector
00603 GraphSegmentation::Winner( ) {
00604 int m = _som->getMapcode()->getMapSize();
00605 TLabel maplabel = _som->getMapcode()->getLabel();
00606
00607 TVector tmp = create_vector( 1, m );
00608 int count = 1;
00609 for (TLabel::iterator i = maplabel.begin(); i != maplabel.end(); i++ ) {
00610 if ( (*i).empty() )
00611 tmp[count] = 0;
00612 else
00613 tmp[count] = (*i).size();
00614 count++;
00615 }
00616 return tmp;
00617 };
00618
00619 TVector
00620 GraphSegmentation::MeanDistance( ) {
00621 int m = _som->getMapcode()->getMapSize();
00622 int i, j;
00623 int d = _som->getMapcode()->getNumVar();
00624 TMatrix codebook = _som->getMapcode()->getCodebook();
00625
00626 TVector mean_distance = create_vector(1,m );
00627 for (i=1; i<=m; i++) mean_distance[i] = 0;
00628 TVector count = create_vector(1, m );
00629 for (j=1; j<=m; j++) count[j] = 0;
00630
00631 for (i=1; i<= m; i++)
00632 for (j=1; j<= m; j++)
00633 if ( ( _adjacentNode[i][j] == 1 ) && ( i != j ) ) {
00634 mean_distance[i] += dist( codebook[j], codebook[i], d );
00635 count[i]++;
00636 }
00637
00638 for (i=1; i<=m; i++)
00639 mean_distance[i] /= count[i];
00640
00641 return mean_distance;
00642 };
00643
00644 TMatrix
00645 GraphSegmentation::AdjacentNode() {
00646 TMatrix delta = _som->getDelta();
00647 int m = _som->getMapcode()->getMapSize();
00648 int i, j;
00649
00650 TMatrix tmp = create_matrix( 1, m, 1, m );
00651
00652 for (i=1; i<=m; i++)
00653 for (j=1; j<=m; j++)
00654 if ( delta[i][j] <= sqrt(2) )
00655 tmp[i][j] = 1;
00656 else
00657 tmp[i][j] = 0;
00658
00659 return tmp;
00660 };
00661
00662 Value_Type
00663 GraphSegmentation::Hmin( ) {
00664 int m = _som->getMapcode()->getMapSize();
00665 int n = _som->getData()->getDataSize();
00666
00667 return ( ((Value_Type)m/(Value_Type)n)*_delta );
00668 };
00669
00670 #endif