public class TopicModel extends Object implements org.apache.hadoop.conf.Configurable, Iterable<MatrixSlice>
Matrix
of counts of occurrences of (topic, term) pairs. Dividing
{code topicTermCount.viewRow(topic).get(term)} by the sum over the values for all terms in that
row yields p(term | topic). Instead dividing it by all topic columns for that term yields
p(topic | term).
Multithreading is enabled for the update(Matrix)
method: this method is async, and
merely submits the matrix to a work queue. When all work has been submitted,
awaitTermination()
should be called, which will block until updates have been
accumulated.Constructor and Description |
---|
TopicModel(org.apache.hadoop.conf.Configuration conf,
double eta,
double alpha,
String[] dictionary,
int numThreads,
double modelWeight,
org.apache.hadoop.fs.Path... modelpath) |
TopicModel(int numTopics,
int numTerms,
double eta,
double alpha,
Random random,
String[] dictionary,
int numThreads,
double modelWeight) |
TopicModel(int numTopics,
int numTerms,
double eta,
double alpha,
String[] dictionary,
double modelWeight) |
TopicModel(int numTopics,
int numTerms,
double eta,
double alpha,
String[] dictionary,
int numThreads,
double modelWeight) |
TopicModel(Matrix topicTermCounts,
double eta,
double alpha,
String[] dictionary,
int numThreads,
double modelWeight) |
TopicModel(Matrix topicTermCounts,
Vector topicSums,
double eta,
double alpha,
String[] dictionary,
double modelWeight) |
TopicModel(Matrix topicTermCounts,
Vector topicSums,
double eta,
double alpha,
String[] dictionary,
int numThreads,
double modelWeight) |
Modifier and Type | Method and Description |
---|---|
org.apache.hadoop.conf.Configuration |
getConf() |
int |
getNumTerms() |
int |
getNumTopics() |
Vector |
infer(Vector original,
Vector docTopics) |
Iterator<MatrixSlice> |
iterator() |
static Pair<Matrix,Vector> |
loadModel(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path... modelPaths) |
double |
perplexity(Vector document,
Vector docTopics)
\(sum_x sum_a (c_ai * log(p(x|i) * p(a|x)))\)
|
void |
persist(org.apache.hadoop.fs.Path outputDir,
boolean overwrite) |
void |
renormalize() |
void |
reset() |
int |
sampleTerm(int topic) |
int |
sampleTerm(Vector topicDistribution) |
void |
setConf(org.apache.hadoop.conf.Configuration configuration) |
void |
stop() |
Vector |
topicSums() |
String |
toString() |
void |
trainDocTopicModel(Vector original,
Vector topics,
Matrix docTopicModel) |
void |
update(int termId,
Vector topicCounts) |
void |
update(Matrix docTopicCounts) |
void |
updateTopic(int topic,
Vector docTopicCounts) |
static String |
vectorToSortedString(Vector vector,
String[] dictionary) |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
forEach, spliterator
public TopicModel(int numTopics, int numTerms, double eta, double alpha, String[] dictionary, double modelWeight)
public TopicModel(org.apache.hadoop.conf.Configuration conf, double eta, double alpha, String[] dictionary, int numThreads, double modelWeight, org.apache.hadoop.fs.Path... modelpath) throws IOException
IOException
public TopicModel(int numTopics, int numTerms, double eta, double alpha, String[] dictionary, int numThreads, double modelWeight)
public TopicModel(int numTopics, int numTerms, double eta, double alpha, Random random, String[] dictionary, int numThreads, double modelWeight)
public TopicModel(Matrix topicTermCounts, Vector topicSums, double eta, double alpha, String[] dictionary, double modelWeight)
public TopicModel(Matrix topicTermCounts, double eta, double alpha, String[] dictionary, int numThreads, double modelWeight)
public int getNumTerms()
public int getNumTopics()
public Iterator<MatrixSlice> iterator()
iterator
in interface Iterable<MatrixSlice>
public Vector topicSums()
public static Pair<Matrix,Vector> loadModel(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path... modelPaths) throws IOException
IOException
public int sampleTerm(Vector topicDistribution)
public int sampleTerm(int topic)
public void reset()
public void stop()
public void renormalize()
public void trainDocTopicModel(Vector original, Vector topics, Matrix docTopicModel)
public void update(Matrix docTopicCounts)
public void updateTopic(int topic, Vector docTopicCounts)
public void update(int termId, Vector topicCounts)
public void persist(org.apache.hadoop.fs.Path outputDir, boolean overwrite) throws IOException
IOException
public double perplexity(Vector document, Vector docTopics)
public void setConf(org.apache.hadoop.conf.Configuration configuration)
setConf
in interface org.apache.hadoop.conf.Configurable
public org.apache.hadoop.conf.Configuration getConf()
getConf
in interface org.apache.hadoop.conf.Configurable
Copyright © 2008–2017 The Apache Software Foundation. All rights reserved.