Class BaseTokenStreamTestCase

java.lang.Object
org.junit.Assert
org.apache.lucene.tests.util.LuceneTestCase
org.apache.lucene.tests.analysis.BaseTokenStreamTestCase
Direct Known Subclasses:
BaseTokenStreamFactoryTestCase

public abstract class BaseTokenStreamTestCase extends LuceneTestCase
Base class for all Lucene unit tests that use TokenStreams.

When writing unit tests for analysis components, it's highly recommended to use the helper methods here (especially in conjunction with MockAnalyzer or MockTokenizer), as they contain many assertions and checks to catch bugs.

See Also:
  • Constructor Details

    • BaseTokenStreamTestCase

      public BaseTokenStreamTestCase()
  • Method Details

    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements, int[] posLengths, Integer finalOffset, Integer finalPosInc, boolean[] keywordAtts, boolean graphOffsetsAreCorrect, byte[][] payloads, int[] flags, float[] boost) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements, int[] posLengths, Integer finalOffset, Integer finalPosInc, boolean[] keywordAtts, boolean graphOffsetsAreCorrect, byte[][] payloads, int[] flags) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements, int[] posLengths, Integer finalOffset, boolean[] keywordAtts, boolean graphOffsetsAreCorrect) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements, int[] posLengths, Integer finalOffset, boolean[] keywordAtts, boolean graphOffsetsAreCorrect, float[] boost) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements, int[] posLengths, Integer finalOffset, Integer finalPosInc, boolean[] keywordAtts, boolean graphOffsetsAreCorrect, byte[][] payloads) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements, int[] posLengths, Integer finalOffset, boolean graphOffsetsAreCorrect, float[] boost) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements, int[] posLengths, Integer finalOffset, boolean graphOffsetsAreCorrect) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements, int[] posLengths, Integer finalOffset) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements, int[] posLengths, Integer finalOffset, float[] boost) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements, Integer finalOffset) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements, int[] posLengths) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, String[] types) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] posIncrements) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets, Integer finalOffset) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets, int[] posIncrements) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets, int[] posIncrements, Integer finalOffset) throws IOException
      Throws:
      IOException
    • assertTokenStreamContents

      public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] startOffsets, int[] endOffsets, int[] posIncrements, int[] posLengths, Integer finalOffset) throws IOException
      Throws:
      IOException
    • assertAnalyzesTo

      public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements) throws IOException
      Throws:
      IOException
    • assertAnalyzesTo

      public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements, int[] posLengths) throws IOException
      Throws:
      IOException
    • assertAnalyzesTo

      public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements, int[] posLengths, float[] boost) throws IOException
      Throws:
      IOException
    • assertAnalyzesTo

      public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements, int[] posLengths, boolean graphOffsetsAreCorrect) throws IOException
      Throws:
      IOException
    • assertAnalyzesTo

      public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int[] startOffsets, int[] endOffsets, String[] types, int[] posIncrements, int[] posLengths, boolean graphOffsetsAreCorrect, byte[][] payloads) throws IOException
      Throws:
      IOException
    • assertAnalyzesTo

      public static void assertAnalyzesTo(Analyzer a, String input, String[] output) throws IOException
      Throws:
      IOException
    • assertAnalyzesTo

      public static void assertAnalyzesTo(Analyzer a, String input, String[] output, String[] types) throws IOException
      Throws:
      IOException
    • assertAnalyzesTo

      public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int[] posIncrements) throws IOException
      Throws:
      IOException
    • assertAnalyzesToPositions

      public static void assertAnalyzesToPositions(Analyzer a, String input, String[] output, int[] posIncrements, int[] posLengths) throws IOException
      Throws:
      IOException
    • assertAnalyzesToPositions

      public static void assertAnalyzesToPositions(Analyzer a, String input, String[] output, String[] types, int[] posIncrements, int[] posLengths) throws IOException
      Throws:
      IOException
    • assertAnalyzesTo

      public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int[] startOffsets, int[] endOffsets) throws IOException
      Throws:
      IOException
    • assertAnalyzesTo

      public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int[] startOffsets, int[] endOffsets, int[] posIncrements) throws IOException
      Throws:
      IOException
    • checkResetException

      public static void checkResetException(Analyzer a, String input) throws IOException
      Throws:
      IOException
    • checkOneTerm

      public static void checkOneTerm(Analyzer a, String input, String expected) throws IOException
      Throws:
      IOException
    • checkRandomData

      public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException
      utility method for blasting tokenstreams with data to make sure they don't do anything crazy
      Throws:
      IOException
    • checkRandomData

      public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength) throws IOException
      utility method for blasting tokenstreams with data to make sure they don't do anything crazy
      Throws:
      IOException
    • checkRandomData

      public static void checkRandomData(Random random, Analyzer a, int iterations, boolean simple) throws IOException
      utility method for blasting tokenstreams with data to make sure they don't do anything crazy
      Parameters:
      simple - true if only ascii strings will be used (try to avoid)
      Throws:
      IOException
    • assertStreamHasNumberOfTokens

      public static void assertStreamHasNumberOfTokens(TokenStream ts, int expectedCount) throws IOException
      Asserts that the given stream has expected number of tokens.
      Throws:
      IOException
    • checkRandomData

      public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean simple) throws IOException
      Throws:
      IOException
    • checkRandomData

      public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean simple, boolean graphOffsetsAreCorrect) throws IOException
      Throws:
      IOException
    • escape

      public static String escape(String s)
    • checkAnalysisConsistency

      public static void checkAnalysisConsistency(Random random, Analyzer a, boolean useCharFilter, String text) throws IOException
      Throws:
      IOException
    • checkAnalysisConsistency

      public static void checkAnalysisConsistency(Random random, Analyzer a, boolean useCharFilter, String text, boolean graphOffsetsAreCorrect) throws IOException
      Throws:
      IOException
    • toDot

      protected String toDot(Analyzer a, String inputText) throws IOException
      Throws:
      IOException
    • toDotFile

      protected void toDotFile(Analyzer a, String inputText, String localFileName) throws IOException
      Throws:
      IOException
    • whitespaceMockTokenizer

      protected static MockTokenizer whitespaceMockTokenizer(Reader input) throws IOException
      Throws:
      IOException
    • whitespaceMockTokenizer

      protected static MockTokenizer whitespaceMockTokenizer(String input) throws IOException
      Throws:
      IOException
    • keywordMockTokenizer

      protected static MockTokenizer keywordMockTokenizer(Reader input) throws IOException
      Throws:
      IOException
    • keywordMockTokenizer

      protected static MockTokenizer keywordMockTokenizer(String input) throws IOException
      Throws:
      IOException
    • newAttributeFactory

      public static AttributeFactory newAttributeFactory(Random random)
      Returns a random AttributeFactory impl
    • newAttributeFactory

      public static AttributeFactory newAttributeFactory()
      Returns a random AttributeFactory impl
    • assertGraphStrings

      public static void assertGraphStrings(Analyzer analyzer, String text, String... expectedStrings) throws IOException
      Enumerates all accepted strings in the token graph created by the analyzer on the provided text, and then asserts that it's equal to the expected strings. Uses TokenStreamToAutomaton to create an automaton. Asserts the finite strings of the automaton are all and only the given valid strings.
      Parameters:
      analyzer - analyzer containing the SynonymFilter under test.
      text - text to be analyzed.
      expectedStrings - all expected finite strings.
      Throws:
      IOException
    • assertGraphStrings

      public static void assertGraphStrings(TokenStream tokenStream, String... expectedStrings) throws IOException
      Enumerates all accepted strings in the token graph created by the already initialized TokenStream.
      Throws:
      IOException
    • getGraphStrings

      public static Set<String> getGraphStrings(Analyzer analyzer, String text) throws IOException
      Returns all paths accepted by the token stream graph produced by analyzing text with the provided analyzer. The tokens CharTermAttribute values are concatenated, and separated with space.
      Throws:
      IOException
    • getGraphStrings

      public static Set<String> getGraphStrings(TokenStream tokenStream) throws IOException
      Returns all paths accepted by the token stream graph produced by the already initialized TokenStream.
      Throws:
      IOException
    • toString

      public static String toString(Analyzer analyzer, String text) throws IOException
      Returns a String summary of the tokens this analyzer produces on this text
      Throws:
      IOException