public final class RandomSeedGenerator extends Object
SequenceFile
, randomly select k vectors and
write them to the output file as a Kluster
representing the
initial centroid to use.
This implementation uses reservoir sampling as described in http://en.wikipedia.org/wiki/Reservoir_samplingModifier and Type | Method and Description |
---|---|
static org.apache.hadoop.fs.Path |
buildRandom(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path input,
org.apache.hadoop.fs.Path output,
int k,
DistanceMeasure measure) |
static org.apache.hadoop.fs.Path |
buildRandom(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path input,
org.apache.hadoop.fs.Path output,
int k,
DistanceMeasure measure,
Long seed) |
public static final String K
public static org.apache.hadoop.fs.Path buildRandom(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path input, org.apache.hadoop.fs.Path output, int k, DistanceMeasure measure) throws IOException
IOException
public static org.apache.hadoop.fs.Path buildRandom(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path input, org.apache.hadoop.fs.Path output, int k, DistanceMeasure measure, Long seed) throws IOException
IOException
Copyright © 2008–2017 The Apache Software Foundation. All rights reserved.