public class SOMLibSparseInputData extends AbstractSOMLibSparseInputData
InputData
based on a SOMLib Input Vector File.Modifier and Type | Field and Description |
---|---|
private boolean |
containsMissingValues |
protected cern.colt.matrix.DoubleMatrix2D |
data
The actual data.
|
static boolean |
DEFAULT_NORMALISED |
static int |
DEFAULT_NUM_CACHE_BLOCKS |
static int |
DEFAULT_RANDOM_SEED |
static boolean |
DEFAULT_SPARSE |
static java.lang.String |
INPUT_VECTOR_FILE_FORMAT_CORRUPT_MESSAGE |
protected int[] |
nonZeros
Counts how many of the feature values are not zero; stores an int value for each vector in the input data.
|
protected boolean |
sparse |
private int |
ydim |
classInfo, contentType, dataNames, dim, ERROR_MESSAGE_FILE_FORMAT_CORRUPT, featureMatrixCols, featureMatrixRows, isNormalized, meanVector, mqe0, nameCache, numVectors, rand, source, templateVector
inputFileNameSuffix, MISSING_VALUE
Modifier | Constructor and Description |
---|---|
protected |
SOMLibSparseInputData() |
protected |
SOMLibSparseInputData(cern.colt.matrix.DoubleMatrix2D data,
java.lang.String[] dataNames,
boolean norm,
java.util.Random rand,
TemplateVector tv,
SOMLibClassInformation clsInfo)
Constructor intended for subset generation.
|
|
SOMLibSparseInputData(InputDatum[] inputData,
SOMLibClassInformation classInfo)
Constructor intended for generated synthetic data.
|
|
SOMLibSparseInputData(java.lang.String vectorFileName)
Uses default values for sparsity (
true ), normalisation (true ), chacheblocks (
1 ) and seed (7 ). |
|
SOMLibSparseInputData(java.lang.String vectorFileName,
boolean sparse,
boolean norm,
int numCacheBlocks,
long seed) |
|
SOMLibSparseInputData(java.lang.String vectorFileName,
java.lang.String templateFileName) |
|
SOMLibSparseInputData(java.lang.String vectorFileName,
java.lang.String templateFileName,
boolean sparse,
boolean norm,
int numCacheBlocks,
long seed) |
|
SOMLibSparseInputData(java.lang.String vectorFileName,
java.lang.String templateFileName,
java.lang.String classInfoFileName) |
|
SOMLibSparseInputData(java.lang.String vectorFileName,
java.lang.String templateFileName,
java.lang.String classInfoFileName,
boolean sparse,
boolean norm,
int numCacheBlocks,
long seed) |
Modifier and Type | Method and Description |
---|---|
protected void |
addInstance(int index,
java.lang.String label) |
cern.colt.matrix.DoubleMatrix2D |
getDataAsMatrix() |
static long |
getDimensionality(java.lang.String vectorFileName) |
InputDatum |
getInputDatum(int index)
Get an input datum with a specified index.
|
double[] |
getInputVector(int d)
Get the vector for the input datum of the specified index
|
double |
getValue(int x,
int y)
Returns the value of the y-th feature of input vector x.
|
void |
init(boolean sparse,
boolean norm,
long seed) |
protected void |
initDataStructures(boolean sparse) |
private void |
initFromExistingData(cern.colt.matrix.DoubleMatrix2D data,
java.lang.String[] dataNames,
boolean norm,
java.util.Random rand,
TemplateVector tv,
SOMLibClassInformation clsInfo) |
protected void |
initMatrix(boolean sparse) |
static void |
main(java.lang.String[] args)
Method for stand-alone execution, prints useful information about the input data.
|
double |
mqe0(DistanceMetric metric)
Calculates the mean quantisation error of the top-level unit.
|
protected static java.io.BufferedReader |
openFile(java.lang.String vectorFileName) |
static SOMLibSparseInputData |
parse(java.lang.String contents)
Parses the given contents in SOMLib format to a
SOMLibSparseInputData object.The difference to the main constructor #SOMLibClassInformation(String) is that the constructor reads from
a file, while this method already has the contents in the given parameter. |
protected double |
parseDouble(java.lang.String s) |
protected void |
processLine(int index,
java.lang.String[] lineElements)
Process a single line of the input vector file.
|
private void |
readVectorFile(boolean sparse,
java.io.BufferedReader br) |
protected void |
readVectorFile(java.lang.String vectorFileName,
boolean sparse)
Reads the input data from the given file, which has to follow the Input Vector File
specification.
|
void |
setLabel(int index,
java.lang.String name) |
protected void |
setMatrixValue(int row,
int column,
double value) |
void |
setValue(int datumIndex,
int attributeIndex,
double value) |
InputData |
subset(java.lang.String[] names)
Gets a subset of this input data set.
|
classInformation, create, dim, equals, getByNameDistanceSorted, getContentType, getData, getData, getDataIntervals, getDataSource, getDistanceMatrix, getDistances, getFeatureDensities, getFeatureMatrixColumns, getFeatureMatrixRows, getFileNameSuffix, getFormatName, getInputDatum, getInputDatum, getInputDatumIndex, getLabel, getLabels, getMaxValue, getMeanVector, getMeanVector, getMinValue, getNearestDistances, getNearestN, getNearestN, getNearestNUnsorted, getRandomInputDatum, initDistanceMatrix, isNormalizedToUnitLength, numVectors, setClassInfo, setTemplateVector, templateVector, transformValues
public static final java.lang.String INPUT_VECTOR_FILE_FORMAT_CORRUPT_MESSAGE
public static final boolean DEFAULT_NORMALISED
public static final int DEFAULT_NUM_CACHE_BLOCKS
public static final int DEFAULT_RANDOM_SEED
public static final boolean DEFAULT_SPARSE
private boolean containsMissingValues
protected int[] nonZeros
protected boolean sparse
protected cern.colt.matrix.DoubleMatrix2D data
private int ydim
public SOMLibSparseInputData(InputDatum[] inputData, SOMLibClassInformation classInfo)
protected SOMLibSparseInputData(cern.colt.matrix.DoubleMatrix2D data, java.lang.String[] dataNames, boolean norm, java.util.Random rand, TemplateVector tv, SOMLibClassInformation clsInfo)
public SOMLibSparseInputData(java.lang.String vectorFileName)
true
), normalisation (true
), chacheblocks (
1
) and seed (7
).public SOMLibSparseInputData(java.lang.String vectorFileName, boolean sparse, boolean norm, int numCacheBlocks, long seed)
public SOMLibSparseInputData(java.lang.String vectorFileName, java.lang.String templateFileName)
public SOMLibSparseInputData(java.lang.String vectorFileName, java.lang.String templateFileName, boolean sparse, boolean norm, int numCacheBlocks, long seed)
public SOMLibSparseInputData(java.lang.String vectorFileName, java.lang.String templateFileName, java.lang.String classInfoFileName) throws SOMToolboxException
SOMToolboxException
public SOMLibSparseInputData(java.lang.String vectorFileName, java.lang.String templateFileName, java.lang.String classInfoFileName, boolean sparse, boolean norm, int numCacheBlocks, long seed) throws SOMToolboxException
SOMToolboxException
protected SOMLibSparseInputData()
private void initFromExistingData(cern.colt.matrix.DoubleMatrix2D data, java.lang.String[] dataNames, boolean norm, java.util.Random rand, TemplateVector tv, SOMLibClassInformation clsInfo)
public void init(boolean sparse, boolean norm, long seed)
public InputDatum getInputDatum(int index)
InputData
index
- the index of the input datum.public double[] getInputVector(int d)
InputData
public double getValue(int x, int y)
InputData
public void setValue(int datumIndex, int attributeIndex, double value)
public double mqe0(DistanceMetric metric)
InputData
metric
- the metric to use for distance calculation.protected void readVectorFile(java.lang.String vectorFileName, boolean sparse)
AbstractSOMLibSparseInputData.meanVector
and creates the
AbstractSOMLibSparseInputData.nameCache
for faster index search.vectorFileName
- the name of the input vector file.private void readVectorFile(boolean sparse, java.io.BufferedReader br)
protected void initDataStructures(boolean sparse)
protected void initMatrix(boolean sparse)
protected static java.io.BufferedReader openFile(java.lang.String vectorFileName)
protected void processLine(int index, java.lang.String[] lineElements) throws java.lang.Exception
index
- the line indexlineElements
- the line elements, split by the delimetersjava.lang.Exception
protected double parseDouble(java.lang.String s)
protected void setMatrixValue(int row, int column, double value)
protected void addInstance(int index, java.lang.String label)
public InputData subset(java.lang.String[] names)
InputData
names
- the label names of the desired subset data.public static void main(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception
public static long getDimensionality(java.lang.String vectorFileName)
public void setLabel(int index, java.lang.String name)
public cern.colt.matrix.DoubleMatrix2D getDataAsMatrix()
public static SOMLibSparseInputData parse(java.lang.String contents)
SOMLibSparseInputData
object.#SOMLibClassInformation(String)
is that the constructor reads from
a file, while this method already has the contents in the given parameter.