org.knowceans.corpus
Class Document

java.lang.Object
  extended by org.knowceans.corpus.Document

public class Document
extends java.lang.Object

wrapper for a document in LDA

lda-c reference: struct document in lda.h. Here the distinction between term and word is used, changing API nomenclature. TODO automatic length tracking

Author:
heinrich

Constructor Summary
Document()
           
Document(Document d)
          copy constructor
Document(int length)
           
 
Method Summary
 void addDocument(Document d)
          add all terms to the end of this document, filling the parBounds field.
 void compile()
           
 int getCount(int index)
           
 int[] getCounts()
           
 int getNumTerms()
           
 int getNumWords()
           
 int[] getParBounds()
           
 int getTerm(int index)
           
 int[] getTerms()
           
 void mergeDocument(Document d)
          merge all terms and add the document or null.
 void setCount(int index, int count)
           
 void setCounts(int[] is)
           
 void setNumTerms(int i)
           
 void setNumWords(int i)
           
 void setParBounds(int[] parBounds)
           
 void setTerm(int index, int term)
           
 void setWords(int[] is)
           
 java.lang.String toString()
           
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
 

Constructor Detail

Document

public Document()

Document

public Document(int length)

Document

public Document(Document d)
copy constructor

Parameters:
document -
Method Detail

compile

public void compile()

getCounts

public int[] getCounts()
Returns:

getCount

public int getCount(int index)
Parameters:
index -
Returns:

setCount

public void setCount(int index,
                     int count)
Parameters:
count -
index -

getNumTerms

public int getNumTerms()
Returns:

getNumWords

public int getNumWords()
Returns:

getTerms

public int[] getTerms()
Returns:

getTerm

public int getTerm(int index)
Parameters:
index -
Returns:

setTerm

public void setTerm(int index,
                    int term)
Parameters:
term -
index -

setCounts

public void setCounts(int[] is)
Parameters:
is -

setNumTerms

public void setNumTerms(int i)
Parameters:
i -

setNumWords

public void setNumWords(int i)
Parameters:
i -

setWords

public void setWords(int[] is)
Parameters:
is -

addDocument

public void addDocument(Document d)
add all terms to the end of this document, filling the parBounds field. Paragraphs should be added only this way; if parBounds == null, the document is emptied before adding new content. Vectors are not copied.

Parameters:
d -

mergeDocument

public void mergeDocument(Document d)
merge all terms and add the document or null.

Parameters:
d -

getParBounds

public int[] getParBounds()

setParBounds

public void setParBounds(int[] parBounds)

toString

public java.lang.String toString()
Overrides:
toString in class java.lang.Object