public final class ClusterDumper extends AbstractJob
Modifier and Type | Class and Description |
---|---|
static class |
ClusterDumper.OUTPUT_FORMAT |
Modifier and Type | Field and Description |
---|---|
static String |
DICTIONARY_OPTION |
static String |
DICTIONARY_TYPE_OPTION |
static String |
EVALUATE_CLUSTERS |
static String |
NUM_WORDS_OPTION |
static String |
OUTPUT_FORMAT_OPT |
static String |
POINTS_DIR_OPTION |
static String |
SAMPLE_POINTS |
static String |
SUBSTRING_OPTION |
argMap, inputFile, inputPath, outputFile, outputPath, tempPath
Constructor and Description |
---|
ClusterDumper() |
ClusterDumper(org.apache.hadoop.fs.Path seqFileDir,
org.apache.hadoop.fs.Path pointsDir) |
Modifier and Type | Method and Description |
---|---|
Map<Integer,List<WeightedPropertyVectorWritable>> |
getClusterIdToPoints() |
long |
getMaxPointsPerCluster() |
int |
getNumTopFeatures() |
int |
getSubString() |
String |
getTermDictionary() |
static void |
main(String[] args) |
void |
printClusters(String[] dictionary) |
static Map<Integer,List<WeightedPropertyVectorWritable>> |
readPoints(org.apache.hadoop.fs.Path pointsPathDir,
long maxPointsPerCluster,
org.apache.hadoop.conf.Configuration conf) |
int |
run(String[] args) |
void |
setMaxPointsPerCluster(long maxPointsPerCluster) |
void |
setNumTopFeatures(int num) |
void |
setOutputFormat(ClusterDumper.OUTPUT_FORMAT of)
Convenience function to set the output format during testing.
|
void |
setSubString(int subString) |
void |
setTermDictionary(String termDictionary,
String dictionaryType) |
addFlag, addInputOption, addOption, addOption, addOption, addOption, addOutputOption, buildOption, buildOption, getAnalyzerClassFromOption, getCLIOption, getConf, getDimensions, getFloat, getFloat, getGroup, getInputFile, getInputPath, getInt, getInt, getOption, getOption, getOption, getOptions, getOutputFile, getOutputPath, getOutputPath, getTempPath, getTempPath, hasOption, keyFor, maybePut, parseArguments, parseArguments, parseDirectories, prepareJob, prepareJob, prepareJob, prepareJob, setConf, setS3SafeCombinedInputPath, shouldRunNextPhase
public static final String SAMPLE_POINTS
public static final String DICTIONARY_TYPE_OPTION
public static final String DICTIONARY_OPTION
public static final String POINTS_DIR_OPTION
public static final String NUM_WORDS_OPTION
public static final String SUBSTRING_OPTION
public static final String EVALUATE_CLUSTERS
public static final String OUTPUT_FORMAT_OPT
public ClusterDumper(org.apache.hadoop.fs.Path seqFileDir, org.apache.hadoop.fs.Path pointsDir)
public ClusterDumper()
public void setOutputFormat(ClusterDumper.OUTPUT_FORMAT of)
public int getSubString()
public void setSubString(int subString)
public Map<Integer,List<WeightedPropertyVectorWritable>> getClusterIdToPoints()
public String getTermDictionary()
public void setNumTopFeatures(int num)
public int getNumTopFeatures()
public long getMaxPointsPerCluster()
public void setMaxPointsPerCluster(long maxPointsPerCluster)
public static Map<Integer,List<WeightedPropertyVectorWritable>> readPoints(org.apache.hadoop.fs.Path pointsPathDir, long maxPointsPerCluster, org.apache.hadoop.conf.Configuration conf)
Copyright © 2008–2017 The Apache Software Foundation. All rights reserved.