public class DistributedRowMatrix extends Object implements VectorIterable, org.apache.hadoop.conf.Configurable
// the path must already contain an already created SequenceFile! DistributedRowMatrix m = new DistributedRowMatrix("path/to/vector/sequenceFile", "tmp/path", 10000000, 250000); m.setConf(new Configuration()); // now if we want to multiply a vector by this matrix, it's dimension must equal the row dimension of this // matrix. If we want to timesSquared() a vector by this matrix, its dimension must equal the column dimension // of the matrix. Vector v = new DenseVector(250000); // now the following operation will be done via a M/R pass via Hadoop. Vector w = m.timesSquared(v);
Modifier and Type | Class and Description |
---|---|
static class |
DistributedRowMatrix.MatrixEntryWritable |
Modifier and Type | Field and Description |
---|---|
static String |
KEEP_TEMP_FILES |
Constructor and Description |
---|
DistributedRowMatrix(org.apache.hadoop.fs.Path inputPath,
org.apache.hadoop.fs.Path outputTmpPath,
int numRows,
int numCols) |
DistributedRowMatrix(org.apache.hadoop.fs.Path inputPath,
org.apache.hadoop.fs.Path outputTmpPath,
int numRows,
int numCols,
boolean keepTempFiles) |
Modifier and Type | Method and Description |
---|---|
Vector |
columnMeans() |
Vector |
columnMeans(String vectorClass)
Returns the column-wise mean of a DistributedRowMatrix
|
org.apache.hadoop.conf.Configuration |
getConf() |
org.apache.hadoop.fs.Path |
getOutputTempPath() |
org.apache.hadoop.fs.Path |
getRowPath() |
Iterator<MatrixSlice> |
iterateAll() |
Iterator<MatrixSlice> |
iterateNonEmpty() |
Iterator<MatrixSlice> |
iterator() |
int |
numCols() |
int |
numRows() |
int |
numSlices() |
void |
setConf(org.apache.hadoop.conf.Configuration conf) |
void |
setOutputTempPathString(String outPathString) |
DistributedRowMatrix |
times(DistributedRowMatrix other)
This implements matrix this.transpose().times(other)
|
DistributedRowMatrix |
times(DistributedRowMatrix other,
org.apache.hadoop.fs.Path outPath)
This implements matrix this.transpose().times(other)
|
Vector |
times(Vector v) |
Vector |
timesSquared(Vector v) |
DistributedRowMatrix |
transpose() |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
forEach, spliterator
public static final String KEEP_TEMP_FILES
public DistributedRowMatrix(org.apache.hadoop.fs.Path inputPath, org.apache.hadoop.fs.Path outputTmpPath, int numRows, int numCols)
public DistributedRowMatrix(org.apache.hadoop.fs.Path inputPath, org.apache.hadoop.fs.Path outputTmpPath, int numRows, int numCols, boolean keepTempFiles)
public org.apache.hadoop.conf.Configuration getConf()
getConf
in interface org.apache.hadoop.conf.Configurable
public void setConf(org.apache.hadoop.conf.Configuration conf)
setConf
in interface org.apache.hadoop.conf.Configurable
public org.apache.hadoop.fs.Path getRowPath()
public org.apache.hadoop.fs.Path getOutputTempPath()
public void setOutputTempPathString(String outPathString)
public Iterator<MatrixSlice> iterateNonEmpty()
iterateNonEmpty
in interface VectorIterable
public Iterator<MatrixSlice> iterateAll()
iterateAll
in interface VectorIterable
public int numSlices()
numSlices
in interface VectorIterable
public int numRows()
numRows
in interface VectorIterable
public int numCols()
numCols
in interface VectorIterable
public DistributedRowMatrix times(DistributedRowMatrix other) throws IOException
other
- a DistributedRowMatrixIOException
public DistributedRowMatrix times(DistributedRowMatrix other, org.apache.hadoop.fs.Path outPath) throws IOException
other
- a DistributedRowMatrixoutPath
- path to write result toIOException
public Vector columnMeans() throws IOException
IOException
public Vector columnMeans(String vectorClass) throws IOException
vectorClass
- desired class for the column-wise mean vector e.g.
RandomAccessSparseVector, DenseVectorIOException
public DistributedRowMatrix transpose() throws IOException
IOException
public Vector times(Vector v)
times
in interface VectorIterable
public Vector timesSquared(Vector v)
timesSquared
in interface VectorIterable
public Iterator<MatrixSlice> iterator()
iterator
in interface Iterable<MatrixSlice>
Copyright © 2008–2017 The Apache Software Foundation. All rights reserved.