public class DocIndexerTei extends DocIndexerTeiBase
DocIndexerXmlHandlers.DefaultWordHandler, DocIndexerXmlHandlers.DocumentElementHandler, DocIndexerXmlHandlers.InlineTagHandler, DocIndexerXmlHandlers.MetadataAttributesHandler, DocIndexerXmlHandlers.MetadataElementHandler, DocIndexerXmlHandlers.MetadataNameValueAttributeHandler, DocIndexerXmlHandlers.WordHandlerBase, DocIndexerXmlHandlers.WordInAttributeHandler
nDocumentsSkipped, reader, skippingCurrentDocument, wordsDone
currentLuceneDoc, documentName, docWriter, logger, MAX_DOCVALUES_LENGTH, metadataFieldValues, omitNorms, parameters
Constructor and Description |
---|
DocIndexerTei(DocWriter indexer,
String fileName,
Reader reader) |
DocIndexerTei(DocWriter indexer,
String fileName,
Reader reader,
String contentElement) |
Modifier and Type | Method and Description |
---|---|
static String |
getDescription() |
static String |
getDisplayName() |
init
addAnnotation, addAnnotation, addAnnotation, addHandler, addHandler, characters, consumeCharacterContent, describePosition, endElement, endPrefixMapping, getWordPosition, index, mainAnnotatedField, mainAnnotation, processingInstruction, punctAnnotation, registerContentsField, startElement, startPrefixMapping, tagAnnotation
appendContent, appendContent, close, getCharacterPosition, getDescription, getDisplayName, isVisible, processContent, processContent, reportCharsProcessed, reportTokensProcessed, setDocument, startCaptureContent, storeCapturedContent, storePartCapturedContent
addMetadataField, addMetadataFieldsFromParameters, addMetadataToDocument, addNumericFields, addToForwardIndex, getCurrentLuceneDoc, getDocWriter, getMetadataField, getMetadataFieldTypeFromIndexerProperties, getParameter, getParameter, getParameter, getParameter, getSensitivitySetting, hasParameter, luceneTypeFromIndexMetadataType, optTranslateFieldName, setDocument, setDocument, setDocument, setDocumentName, setDocWriter, setOmitNorms, setParameter, setParameters, tokenizeField, warn
Copyright © 2020 Instituut voor Nederlandse Taal (INT). All rights reserved.