at.tuwien.ifs.somtoolbox.clustering
Class WardClustering<E>

java.lang.Object
  extended by at.tuwien.ifs.somtoolbox.clustering.WardClustering<E>
All Implemented Interfaces:
ClusteringAlgorithm<E>, HierarchicalClusteringAlgorithm<E>

public class WardClustering<E>
extends Object
implements HierarchicalClusteringAlgorithm<E>

Version:
$Id: WardClustering.java 3766 2010-08-20 14:23:28Z mayer $
Author:
Rudolf Mayer

Nested Class Summary
(package private)  class WardClustering.ClusterThread
           
 
Field Summary
protected  ArrayList<Cluster<E>>[] clusterLevels
           
protected  List<HierarchicalCluster<E>> clusters
          Stores the clusters; starts from being a list of clusters containing only one element (n elements), up to a final set of clusters; there will be at least one single cluster containing all elements (if the targetSize or threshold permit that), or potentially any other number m <= n.
protected  boolean debug
          Turn on some debugging
private  CountDownLatch doneSignal
           
private  ThreadPoolExecutor e
           
protected  ClusterElementFunctions<E> elementDistance
           
private  int numberOfCPUs
           
protected  int targetSize
           
protected  double threshold
           
 
Constructor Summary
WardClustering(ClusterElementFunctions<E> elementDistance)
          Do full clustering tree.
WardClustering(ClusterElementFunctions<E> elementDistance, double threshold)
          Do clustering with a specified threshold to stop building the tree.
WardClustering(ClusterElementFunctions<E> elementDistance, int targetSize)
          Do clustering with a specified target number of clusters to reach.
 
Method Summary
 HierarchicalCluster<E> clusterStep(List<HierarchicalCluster<E>> clusters)
           
 List<HierarchicalCluster<E>> doCluster(List<E> data)
           
 double ess(Cluster<E> cluster)
           
private  HierarchicalCluster<E> findOptiomalClusterMerger(List<HierarchicalCluster<E>> clusters)
           
private  HierarchicalCluster<E> findOptiomalClusterMerger(List<HierarchicalCluster<E>> clusters, int startX, int startY, int endX, int endY)
           
 List<HierarchicalCluster<E>> getClusters()
           
 ArrayList<Cluster<E>> getClustersAtLevel(int num)
           
 ArrayList<Cluster<E>> getClustersByThreshold(double threshold)
           
private  Indices2D[] getIndices(List<HierarchicalCluster<E>> clusters)
          find matrix indices for starting & ending the symmetrical matrix indices
 double getInitialMinESS(List<HierarchicalCluster<E>> clusters)
           
 prefuse.data.Tree getPrefuseTree()
           
protected  void init(List<E> data)
           
 void setDebug(boolean debug)
           
 void setNumberOfCPUs(int numberOfCPUs)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

numberOfCPUs

private int numberOfCPUs

doneSignal

private CountDownLatch doneSignal

e

private ThreadPoolExecutor e

clusterLevels

protected ArrayList<Cluster<E>>[] clusterLevels

elementDistance

protected ClusterElementFunctions<E> elementDistance

threshold

protected double threshold

targetSize

protected int targetSize

clusters

protected List<HierarchicalCluster<E>> clusters
Stores the clusters; starts from being a list of clusters containing only one element (n elements), up to a final set of clusters; there will be at least one single cluster containing all elements (if the targetSize or threshold permit that), or potentially any other number m <= n. Intermediate results, with numbers of clusters l, m < l < n, are stored in #clusterLevels.


debug

protected boolean debug
Turn on some debugging

Constructor Detail

WardClustering

public WardClustering(ClusterElementFunctions<E> elementDistance)
Do full clustering tree.


WardClustering

public WardClustering(ClusterElementFunctions<E> elementDistance,
                      double threshold)
Do clustering with a specified threshold to stop building the tree.


WardClustering

public WardClustering(ClusterElementFunctions<E> elementDistance,
                      int targetSize)
Do clustering with a specified target number of clusters to reach.

Method Detail

getPrefuseTree

public prefuse.data.Tree getPrefuseTree()
Specified by:
getPrefuseTree in interface HierarchicalClusteringAlgorithm<E>

doCluster

public List<HierarchicalCluster<E>> doCluster(List<E> data)
Specified by:
doCluster in interface ClusteringAlgorithm<E>

init

protected void init(List<E> data)

getInitialMinESS

public double getInitialMinESS(List<HierarchicalCluster<E>> clusters)

ess

public double ess(Cluster<E> cluster)

clusterStep

public HierarchicalCluster<E> clusterStep(List<HierarchicalCluster<E>> clusters)

findOptiomalClusterMerger

private HierarchicalCluster<E> findOptiomalClusterMerger(List<HierarchicalCluster<E>> clusters)

findOptiomalClusterMerger

private HierarchicalCluster<E> findOptiomalClusterMerger(List<HierarchicalCluster<E>> clusters,
                                                         int startX,
                                                         int startY,
                                                         int endX,
                                                         int endY)

getClustersByThreshold

public ArrayList<Cluster<E>> getClustersByThreshold(double threshold)
                                             throws org.apache.commons.lang.NotImplementedException
Throws:
org.apache.commons.lang.NotImplementedException

getClustersAtLevel

public ArrayList<Cluster<E>> getClustersAtLevel(int num)

setDebug

public void setDebug(boolean debug)

setNumberOfCPUs

public void setNumberOfCPUs(int numberOfCPUs)

getIndices

private Indices2D[] getIndices(List<HierarchicalCluster<E>> clusters)
find matrix indices for starting & ending the symmetrical matrix indices


getClusters

public List<HierarchicalCluster<E>> getClusters()