at.tuwien.ifs.somtoolbox.clustering
Class WardClustering<E>

java.lang.Object
  extended by at.tuwien.ifs.somtoolbox.clustering.WardClustering<E>
All Implemented Interfaces:
ClusteringAlgorithm<E>, HierarchicalClusteringAlgorithm<E>

public class WardClustering<E>
extends java.lang.Object
implements HierarchicalClusteringAlgorithm<E>

Version:
$Id: WardClustering.java 3932 2010-11-09 16:56:38Z mayer $
Author:
Rudolf Mayer

Nested Class Summary
(package private)  class WardClustering.ClusterThread
           
 
Field Summary
protected  double[] clusterLevelMergeCosts
           
protected  java.util.ArrayList<HierarchicalCluster<E>>[] clusterLevels
           
protected  java.util.List<HierarchicalCluster<E>> clusters
          Stores the clusters; starts from being a list of clusters containing only one element (n elements), up to a final set of clusters; there will be at least one single cluster containing all elements (if the targetSize or threshold permit that), or potentially any other number m <= n.
protected  boolean debug
          Turn on some debugging
private  java.util.concurrent.CountDownLatch doneSignal
           
private  java.util.concurrent.ThreadPoolExecutor e
           
protected  ClusterElementFunctions<E> elementDistance
           
private  int numberOfCPUs
           
protected  int targetSize
           
protected  double threshold
           
 
Constructor Summary
WardClustering(ClusterElementFunctions<E> elementDistance)
          Do full clustering tree.
WardClustering(ClusterElementFunctions<E> elementDistance, double threshold)
          Do clustering with a specified threshold to stop building the tree.
WardClustering(ClusterElementFunctions<E> elementDistance, int targetSize)
          Do clustering with a specified target number of clusters to reach.
 
Method Summary
 HierarchicalCluster<E> clusterStep(java.util.List<HierarchicalCluster<E>> clusters)
           
 java.util.List<HierarchicalCluster<E>> doCluster(java.util.List<E> data)
           
 double ess(Cluster<E> cluster)
           
private  HierarchicalCluster<E> findOptiomalClusterMerger(java.util.List<HierarchicalCluster<E>> clusters)
           
private  HierarchicalCluster<E> findOptiomalClusterMerger(java.util.List<HierarchicalCluster<E>> clusters, int startX, int startY, int endX, int endY)
           
 java.util.List<HierarchicalCluster<E>> getClusters()
           
 java.util.HashMap<java.lang.Integer,java.util.ArrayList<HierarchicalCluster<E>>> getClustersAtLevel()
          Returns the clusters at all levels
 java.util.ArrayList<HierarchicalCluster<E>> getClustersAtLevel(int num)
          Returns the clustering at a certain level, where the level equals the number of clusters
 java.util.ArrayList<HierarchicalCluster<E>> getClustersByRelativeThreshold(double percent)
          Returns the clustering at a certain level indicated by the relative merge cost for that level, compared to the costs of merging all data items
 java.util.ArrayList<HierarchicalCluster<E>> getClustersByThreshold(double threshold)
          Returns the clustering at a certain level indicated by the merge cost for that level
private  Indices2D[] getIndices(java.util.List<HierarchicalCluster<E>> clusters)
          find matrix indices for starting & ending the symmetrical matrix indices
 double getInitialMinESS(java.util.List<HierarchicalCluster<E>> clusters)
           
 prefuse.data.Tree getPrefuseTree()
           
protected  void init(java.util.List<E> data)
           
 void setDebug(boolean debug)
           
 void setNumberOfCPUs(int numberOfCPUs)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

numberOfCPUs

private int numberOfCPUs

doneSignal

private java.util.concurrent.CountDownLatch doneSignal

e

private java.util.concurrent.ThreadPoolExecutor e

clusterLevels

protected java.util.ArrayList<HierarchicalCluster<E>>[] clusterLevels

clusterLevelMergeCosts

protected double[] clusterLevelMergeCosts

elementDistance

protected ClusterElementFunctions<E> elementDistance

threshold

protected double threshold

targetSize

protected int targetSize

clusters

protected java.util.List<HierarchicalCluster<E>> clusters
Stores the clusters; starts from being a list of clusters containing only one element (n elements), up to a final set of clusters; there will be at least one single cluster containing all elements (if the targetSize or threshold permit that), or potentially any other number m <= n. Intermediate results, with numbers of clusters l, m < l < n, are stored in #clusterLevels.


debug

protected boolean debug
Turn on some debugging

Constructor Detail

WardClustering

public WardClustering(ClusterElementFunctions<E> elementDistance)
Do full clustering tree.


WardClustering

public WardClustering(ClusterElementFunctions<E> elementDistance,
                      double threshold)
Do clustering with a specified threshold to stop building the tree.


WardClustering

public WardClustering(ClusterElementFunctions<E> elementDistance,
                      int targetSize)
Do clustering with a specified target number of clusters to reach.

Method Detail

getPrefuseTree

public prefuse.data.Tree getPrefuseTree()
Specified by:
getPrefuseTree in interface HierarchicalClusteringAlgorithm<E>

doCluster

public java.util.List<HierarchicalCluster<E>> doCluster(java.util.List<E> data)
Specified by:
doCluster in interface ClusteringAlgorithm<E>

init

protected void init(java.util.List<E> data)

getInitialMinESS

public double getInitialMinESS(java.util.List<HierarchicalCluster<E>> clusters)

ess

public double ess(Cluster<E> cluster)

clusterStep

public HierarchicalCluster<E> clusterStep(java.util.List<HierarchicalCluster<E>> clusters)

findOptiomalClusterMerger

private HierarchicalCluster<E> findOptiomalClusterMerger(java.util.List<HierarchicalCluster<E>> clusters)

findOptiomalClusterMerger

private HierarchicalCluster<E> findOptiomalClusterMerger(java.util.List<HierarchicalCluster<E>> clusters,
                                                         int startX,
                                                         int startY,
                                                         int endX,
                                                         int endY)

getClustersByThreshold

public java.util.ArrayList<HierarchicalCluster<E>> getClustersByThreshold(double threshold)
                                                                   throws SOMToolboxException
Returns the clustering at a certain level indicated by the merge cost for that level

Throws:
SOMToolboxException

getClustersByRelativeThreshold

public java.util.ArrayList<HierarchicalCluster<E>> getClustersByRelativeThreshold(double percent)
                                                                           throws SOMToolboxException
Returns the clustering at a certain level indicated by the relative merge cost for that level, compared to the costs of merging all data items

Throws:
SOMToolboxException

getClustersAtLevel

public java.util.ArrayList<HierarchicalCluster<E>> getClustersAtLevel(int num)
                                                               throws SOMToolboxException
Returns the clustering at a certain level, where the level equals the number of clusters

Specified by:
getClustersAtLevel in interface HierarchicalClusteringAlgorithm<E>
Throws:
SOMToolboxException

getClustersAtLevel

public java.util.HashMap<java.lang.Integer,java.util.ArrayList<HierarchicalCluster<E>>> getClustersAtLevel()
                                                                                                    throws SOMToolboxException
Description copied from interface: HierarchicalClusteringAlgorithm
Returns the clusters at all levels

Specified by:
getClustersAtLevel in interface HierarchicalClusteringAlgorithm<E>
Throws:
SOMToolboxException

setDebug

public void setDebug(boolean debug)

setNumberOfCPUs

public void setNumberOfCPUs(int numberOfCPUs)

getIndices

private Indices2D[] getIndices(java.util.List<HierarchicalCluster<E>> clusters)
find matrix indices for starting & ending the symmetrical matrix indices


getClusters

public java.util.List<HierarchicalCluster<E>> getClusters()