org.cdlib.xtf.textEngine.facet
Class FRBRGroupData

Object
  extended by GroupData
      extended by DynamicGroupData
          extended by FRBRGroupData

public class FRBRGroupData
extends DynamicGroupData

Implements a dynamic mapping from document to a FRBR-style title/author key.

Author:
Martin Haye

Field Summary
private  TagChars chars1
           
private  TagChars chars2
           
private static char[] charType
           
private  FRBRData data
          Tag/doc data for the specified fields
private  IntList docGroups
          Mapping of documents to groups
private  IntList docs
          IDs of matching documents
private  FloatList docScores
          Score of each matching document
private  IntList groupDocCounts
          Number of documents in each group
private  IntList groupDocs
          First document in each group (for sorting purposes)
private  FloatList groupScores
          Score of each group
private  IntList matchTags1
           
private  IntList matchTags2
           
private  int maxDoc
          Highest doc ID encountered
private  int nGroups
          Number of groups created so far
private  String params
          Original parameter string
private  int primarySort
          Primary field to sort by
private  boolean reversePrimarySort
          Whether primary sort is in reverse order
private static int WORD_HASH_SIZE
           
private  int[] wordHash
           
private  int wordHashKey
           
 
Constructor Summary
FRBRGroupData()
           
 
Method Summary
 int child(int groupId)
          Get the first child of the given group, or -1 if it has no children
 void collect(int doc, float score)
          Add a document (that matched the query) to our data.
 int compare(int group1, int group2)
          Compare two groups for sort order
private  int compareField(int type, int doc1, int doc2, boolean reverse)
          Compare a particular field of two groups
private  void debugFieldMatch(String field, int doc1, int doc2)
           
private  String docTitle(int doc)
          Find the title of a document
 String field()
          Get the field name (synthetic in our case)
private  void findGroup(int mainDoc)
          Figure out a group to put the document in.
 int findGroup(String name)
          Locate a group by name and return its index, or -1 if not found
 void finish()
          Form the final FRBR groups for the document set.
 int firstLink(int docId)
          Return the ID of the first link for the given document, or -1 if there are no links for that document.
 void init(IndexReader indexReader, String params)
          Read in the FRBR data for the a delimited list of fields.
 boolean isDynamic()
          Whether the data is dynamic and thus has counts and scores available
 int linkGroup(int linkId)
          Returns the group number of the specified link
private  boolean matchOnTitle(int mainDoc, int mainTitle, int compTitle)
          Determines if the two titles match enough to warrant further examination, and if so, continues the matching process on documents from the comparable title.
private  boolean matchPartialAuthor(int tag1, int tag2)
          Compare two author names to see if the keywords from one are completely contained within the other.
private  boolean matchPartialId(int tag1, int tag2)
          Check if two identifiers match before parentheses
private  boolean matchPartialTitle(int tag1, int tag2)
          Check if one title matches the other without a colon.
private  boolean multiFieldMatch(int doc1, int doc2)
          Compare the fields of two documents to determine if they should be in the same FRBR group.
 String name(int groupId)
          Get the name of a group given its number
 int nChildren(int groupId)
          Get the number of children a group has
 int nDocHits(int groupId)
          Only called for dynamic data: get count of docs in a group
 int nextLink(int linkId)
          Return the ID of the link after the specified one, or -1 if no more
 int nGroups()
          Get the total number of groups
private  void outputDisplayKey(String title, int doc)
           
 int parent(int groupId)
          Get the parent of the given group, or -1 if group is the root
 float score(int groupId)
          Only called for dynamic data: get score of a group
private  int scoreAuthorMatch(IntList list1, IntList list2)
          Score the potential match of two lists of authors.
private  int scoreDateMatch(IntList list1, IntList list2)
          Compare two dates for a match.
private  int scoreIdMatch(IntList list1, IntList list2)
          Score the potential match of two lists of identifiers.
private  int scoreTitleMatch(IntList list1, IntList list2)
          Score the potential match of two lists of titles.
 int sibling(int groupId)
          Get the sibling of the given group, or -1 if no more
 
Methods inherited from class GroupData
debugGroups
 
Methods inherited from class Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

params

private String params
Original parameter string


data

private FRBRData data
Tag/doc data for the specified fields


docs

private IntList docs
IDs of matching documents


maxDoc

private int maxDoc
Highest doc ID encountered


docScores

private FloatList docScores
Score of each matching document


docGroups

private IntList docGroups
Mapping of documents to groups


groupDocs

private IntList groupDocs
First document in each group (for sorting purposes)


groupDocCounts

private IntList groupDocCounts
Number of documents in each group


groupScores

private FloatList groupScores
Score of each group


nGroups

private int nGroups
Number of groups created so far


primarySort

private int primarySort
Primary field to sort by


reversePrimarySort

private boolean reversePrimarySort
Whether primary sort is in reverse order


matchTags1

private IntList matchTags1

matchTags2

private IntList matchTags2

chars1

private TagChars chars1

chars2

private TagChars chars2

wordHashKey

private int wordHashKey

WORD_HASH_SIZE

private static final int WORD_HASH_SIZE

wordHash

private int[] wordHash

charType

private static final char[] charType
Constructor Detail

FRBRGroupData

public FRBRGroupData()
Method Detail

init

public void init(IndexReader indexReader,
                 String params)
          throws IOException
Read in the FRBR data for the a delimited list of fields.

Specified by:
init in class DynamicGroupData
Throws:
IOException

collect

public void collect(int doc,
                    float score)
Add a document (that matched the query) to our data.

Specified by:
collect in class DynamicGroupData
Parameters:
doc - Lucene document identifier for matching document
score - Calculated score for the doc (always greater than zero)

finish

public void finish()
Form the final FRBR groups for the document set.

Specified by:
finish in class DynamicGroupData

findGroup

private void findGroup(int mainDoc)
Figure out a group to put the document in. If it matches other documents, the group will contain all of them; otherwise, it'll be a singleton.

Parameters:
mainDoc - Document to put into a group

matchOnTitle

private boolean matchOnTitle(int mainDoc,
                             int mainTitle,
                             int compTitle)
Determines if the two titles match enough to warrant further examination, and if so, continues the matching process on documents from the comparable title.

Parameters:
mainDoc - main document being matched
mainTitle - main doc's title tag
compTitle - title tag to compare
Returns:
true if title iteration should continue.

multiFieldMatch

private boolean multiFieldMatch(int doc1,
                                int doc2)
Compare the fields of two documents to determine if they should be in the same FRBR group.

Parameters:
doc1 - First document
doc2 - Second document
Returns:
true if they're equivalent

debugFieldMatch

private void debugFieldMatch(String field,
                             int doc1,
                             int doc2)

outputDisplayKey

private void outputDisplayKey(String title,
                              int doc)

scoreTitleMatch

private int scoreTitleMatch(IntList list1,
                            IntList list2)
Score the potential match of two lists of titles.


matchPartialTitle

private boolean matchPartialTitle(int tag1,
                                  int tag2)
Check if one title matches the other without a colon.


scoreAuthorMatch

private int scoreAuthorMatch(IntList list1,
                             IntList list2)
Score the potential match of two lists of authors.


matchPartialAuthor

private boolean matchPartialAuthor(int tag1,
                                   int tag2)
Compare two author names to see if the keywords from one are completely contained within the other.


scoreDateMatch

private int scoreDateMatch(IntList list1,
                           IntList list2)
Compare two dates for a match.


scoreIdMatch

private int scoreIdMatch(IntList list1,
                         IntList list2)
Score the potential match of two lists of identifiers.


matchPartialId

private boolean matchPartialId(int tag1,
                               int tag2)
Check if two identifiers match before parentheses


field

public String field()
Get the field name (synthetic in our case)

Specified by:
field in class GroupData

name

public String name(int groupId)
Description copied from class: GroupData
Get the name of a group given its number

Specified by:
name in class GroupData

findGroup

public int findGroup(String name)
Description copied from class: GroupData
Locate a group by name and return its index, or -1 if not found

Specified by:
findGroup in class GroupData

child

public int child(int groupId)
Description copied from class: GroupData
Get the first child of the given group, or -1 if it has no children

Specified by:
child in class GroupData

sibling

public int sibling(int groupId)
Description copied from class: GroupData
Get the sibling of the given group, or -1 if no more

Specified by:
sibling in class GroupData

parent

public int parent(int groupId)
Description copied from class: GroupData
Get the parent of the given group, or -1 if group is the root

Specified by:
parent in class GroupData

nChildren

public int nChildren(int groupId)
Description copied from class: GroupData
Get the number of children a group has

Specified by:
nChildren in class GroupData

firstLink

public int firstLink(int docId)
Description copied from class: GroupData
Return the ID of the first link for the given document, or -1 if there are no links for that document.

Specified by:
firstLink in class GroupData
Parameters:
docId - document to look for
Returns:
the first link ID, or -1 if none

nextLink

public int nextLink(int linkId)
Description copied from class: GroupData
Return the ID of the link after the specified one, or -1 if no more

Specified by:
nextLink in class GroupData

linkGroup

public int linkGroup(int linkId)
Description copied from class: GroupData
Returns the group number of the specified link

Specified by:
linkGroup in class GroupData

nGroups

public int nGroups()
Description copied from class: GroupData
Get the total number of groups

Specified by:
nGroups in class GroupData

isDynamic

public boolean isDynamic()
Description copied from class: GroupData
Whether the data is dynamic and thus has counts and scores available

Overrides:
isDynamic in class GroupData

nDocHits

public int nDocHits(int groupId)
Description copied from class: GroupData
Only called for dynamic data: get count of docs in a group

Overrides:
nDocHits in class GroupData

score

public float score(int groupId)
Description copied from class: GroupData
Only called for dynamic data: get score of a group

Overrides:
score in class GroupData

compare

public final int compare(int group1,
                         int group2)
Description copied from class: GroupData
Compare two groups for sort order

Specified by:
compare in class GroupData

docTitle

private String docTitle(int doc)
Find the title of a document


compareField

private int compareField(int type,
                         int doc1,
                         int doc2,
                         boolean reverse)
Compare a particular field of two groups