public class FRBRGroupData extends DynamicGroupData
Modifier and Type | Field and Description |
---|---|
private TagChars |
chars1 |
private TagChars |
chars2 |
private static char[] |
charType |
private FRBRData |
data
Tag/doc data for the specified fields
|
private IntList |
docGroups
Mapping of documents to groups
|
private IntList |
docs
IDs of matching documents
|
private FloatList |
docScores
Score of each matching document
|
private IntList |
groupDocCounts
Number of documents in each group
|
private IntList |
groupDocs
First document in each group (for sorting purposes)
|
private FloatList |
groupScores
Score of each group
|
private IntList |
matchTags1 |
private IntList |
matchTags2 |
private int |
maxDoc
Highest doc ID encountered
|
private int |
nGroups
Number of groups created so far
|
private String |
params
Original parameter string
|
private int |
primarySort
Primary field to sort by
|
private boolean |
reversePrimarySort
Whether primary sort is in reverse order
|
private static int |
WORD_HASH_SIZE |
private int[] |
wordHash |
private int |
wordHashKey |
Constructor and Description |
---|
FRBRGroupData() |
Modifier and Type | Method and Description |
---|---|
int |
child(int groupId)
Get the first child of the given group, or -1 if it has no children
|
void |
collect(int doc,
float score)
Add a document (that matched the query) to our data.
|
int |
compare(int group1,
int group2)
Compare two groups for sort order
|
private int |
compareField(int type,
int doc1,
int doc2,
boolean reverse)
Compare a particular field of two groups
|
private void |
debugFieldMatch(String field,
int doc1,
int doc2) |
private String |
docTitle(int doc)
Find the title of a document
|
String |
field()
Get the field name (synthetic in our case)
|
private void |
findGroup(int mainDoc)
Figure out a group to put the document in.
|
int |
findGroup(String name)
Locate a group by name and return its index, or -1 if not found
|
void |
finish()
Form the final FRBR groups for the document set.
|
int |
firstLink(int docId)
Return the ID of the first link for the given document, or -1 if there
are no links for that document.
|
void |
init(IndexReader indexReader,
Set tokFields,
String params)
Read in the FRBR data for the a delimited list of fields.
|
boolean |
isDynamic()
Whether the data is dynamic and thus has counts and scores available
|
int |
linkGroup(int linkId)
Returns the group number of the specified link
|
private boolean |
matchOnTitle(int mainDoc,
int mainTitle,
int compTitle)
Determines if the two titles match enough to warrant further examination,
and if so, continues the matching process on documents from the
comparable title.
|
private boolean |
matchPartialAuthor(int tag1,
int tag2)
Compare two author names to see if the keywords from one are completely
contained within the other.
|
private boolean |
matchPartialId(int tag1,
int tag2)
Check if two identifiers match before parentheses
|
private boolean |
matchPartialTitle(int tag1,
int tag2)
Check if one title matches the other without a colon.
|
private boolean |
multiFieldMatch(int doc1,
int doc2)
Compare the fields of two documents to determine if they should be in
the same FRBR group.
|
String |
name(int groupId)
Get the name of a group given its number
|
int |
nChildren(int groupId)
Get the number of children a group has
|
int |
nDocHits(int groupId)
Only called for dynamic data: get count of docs in a group
|
int |
nextLink(int linkId)
Return the ID of the link after the specified one, or -1 if no more
|
int |
nGroups()
Get the total number of groups
|
private void |
outputDisplayKey(String title,
int doc) |
int |
parent(int groupId)
Get the parent of the given group, or -1 if group is the root
|
private int |
parseYear(TagChars chars)
Search characters for a series of 4 digits, and consider that a year.
|
float |
score(int groupId)
Only called for dynamic data: get score of a group
|
private int |
scoreAuthorMatch(IntList list1,
IntList list2)
Score the potential match of two lists of authors.
|
private int |
scoreDateMatch(IntList list1,
IntList list2)
Compare two dates for a match.
|
private int |
scoreIdMatch(IntList list1,
IntList list2)
Score the potential match of two lists of identifiers.
|
private int |
scoreTitleMatch(IntList list1,
IntList list2)
Score the potential match of two lists of titles.
|
int |
sibling(int groupId)
Get the sibling of the given group, or -1 if no more
|
debugGroups
private String params
private FRBRData data
private IntList docs
private int maxDoc
private FloatList docScores
private IntList docGroups
private IntList groupDocs
private IntList groupDocCounts
private FloatList groupScores
private int nGroups
private int primarySort
private boolean reversePrimarySort
private IntList matchTags1
private IntList matchTags2
private TagChars chars1
private TagChars chars2
private int wordHashKey
private static final int WORD_HASH_SIZE
private int[] wordHash
private static final char[] charType
public void init(IndexReader indexReader, Set tokFields, String params) throws IOException
init
in class DynamicGroupData
IOException
public void collect(int doc, float score)
collect
in class DynamicGroupData
doc
- Lucene document identifier for matching documentscore
- Calculated score for the doc (always greater than zero)public void finish()
finish
in class DynamicGroupData
private void findGroup(int mainDoc)
mainDoc
- Document to put into a groupprivate boolean matchOnTitle(int mainDoc, int mainTitle, int compTitle)
mainDoc
- main document being matchedmainTitle
- main doc's title tagcompTitle
- title tag to compareprivate boolean multiFieldMatch(int doc1, int doc2)
doc1
- First documentdoc2
- Second documentprivate void debugFieldMatch(String field, int doc1, int doc2)
private void outputDisplayKey(String title, int doc)
private int scoreTitleMatch(IntList list1, IntList list2)
private boolean matchPartialTitle(int tag1, int tag2)
private int scoreAuthorMatch(IntList list1, IntList list2)
private boolean matchPartialAuthor(int tag1, int tag2)
private int scoreDateMatch(IntList list1, IntList list2)
private int parseYear(TagChars chars)
private int scoreIdMatch(IntList list1, IntList list2)
private boolean matchPartialId(int tag1, int tag2)
public String field()
public String name(int groupId)
GroupData
public int findGroup(String name)
GroupData
public int child(int groupId)
GroupData
public int sibling(int groupId)
GroupData
public int parent(int groupId)
GroupData
public int nChildren(int groupId)
GroupData
public int firstLink(int docId)
GroupData
public int nextLink(int linkId)
GroupData
public int linkGroup(int linkId)
GroupData
public int nGroups()
GroupData
public boolean isDynamic()
GroupData
public int nDocHits(int groupId)
GroupData
public float score(int groupId)
GroupData
public final int compare(int group1, int group2)
GroupData
private String docTitle(int doc)
private int compareField(int type, int doc1, int doc2, boolean reverse)