001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.lucene.demo.facet;
018
019import java.io.IOException;
020import java.time.LocalDate;
021import java.time.ZoneOffset;
022import java.util.Arrays;
023import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
024import org.apache.lucene.document.Document;
025import org.apache.lucene.document.Field;
026import org.apache.lucene.document.FloatPoint;
027import org.apache.lucene.document.IntPoint;
028import org.apache.lucene.document.LongPoint;
029import org.apache.lucene.document.StringField;
030import org.apache.lucene.facet.FacetResult;
031import org.apache.lucene.facet.Facets;
032import org.apache.lucene.facet.FacetsCollector;
033import org.apache.lucene.facet.FacetsCollectorManager;
034import org.apache.lucene.facet.facetset.DimRange;
035import org.apache.lucene.facet.facetset.ExactFacetSetMatcher;
036import org.apache.lucene.facet.facetset.FacetSet;
037import org.apache.lucene.facet.facetset.FacetSetDecoder;
038import org.apache.lucene.facet.facetset.FacetSetMatcher;
039import org.apache.lucene.facet.facetset.FacetSetsField;
040import org.apache.lucene.facet.facetset.MatchingFacetSetsCounts;
041import org.apache.lucene.facet.facetset.RangeFacetSetMatcher;
042import org.apache.lucene.index.DirectoryReader;
043import org.apache.lucene.index.IndexWriter;
044import org.apache.lucene.index.IndexWriterConfig;
045import org.apache.lucene.index.IndexWriterConfig.OpenMode;
046import org.apache.lucene.search.BooleanClause;
047import org.apache.lucene.search.BooleanQuery;
048import org.apache.lucene.search.IndexSearcher;
049import org.apache.lucene.search.MatchAllDocsQuery;
050import org.apache.lucene.search.Query;
051import org.apache.lucene.search.TermInSetQuery;
052import org.apache.lucene.store.ByteBuffersDirectory;
053import org.apache.lucene.store.Directory;
054import org.apache.lucene.util.BytesRef;
055import org.apache.lucene.util.NumericUtils;
056
057/**
058 * Shows usage of indexing and searching {@link FacetSetsField} with a custom {@link FacetSet}
059 * implementation. Unlike the out of the box {@link FacetSet} implementations, this example shows
060 * how to mix and match dimensions of different types, as well as implementing a custom {@link
061 * FacetSetMatcher}.
062 */
063public class CustomFacetSetExample {
064
065  private static final long MAY_SECOND_2022 = date("2022-05-02");
066  private static final long JUNE_SECOND_2022 = date("2022-06-02");
067  private static final long JULY_SECOND_2022 = date("2022-07-02");
068  private static final float HUNDRED_TWENTY_DEGREES = fahrenheitToCelsius(120);
069  private static final float HUNDRED_DEGREES = fahrenheitToCelsius(100);
070  private static final float EIGHTY_DEGREES = fahrenheitToCelsius(80);
071
072  private final Directory indexDir = new ByteBuffersDirectory();
073
074  /** Empty constructor */
075  public CustomFacetSetExample() {}
076
077  /** Build the example index. */
078  private void index() throws IOException {
079    IndexWriter indexWriter =
080        new IndexWriter(
081            indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
082
083    // Every document holds the temperature measures for a City by Date
084
085    Document doc = new Document();
086    doc.add(new StringField("city", "city1", Field.Store.YES));
087    doc.add(
088        FacetSetsField.create(
089            "temperature",
090            new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES),
091            new TemperatureReadingFacetSet(JUNE_SECOND_2022, EIGHTY_DEGREES),
092            new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
093    addFastMatchFields(doc);
094    indexWriter.addDocument(doc);
095
096    doc = new Document();
097    doc.add(new StringField("city", "city2", Field.Store.YES));
098    doc.add(
099        FacetSetsField.create(
100            "temperature",
101            new TemperatureReadingFacetSet(MAY_SECOND_2022, EIGHTY_DEGREES),
102            new TemperatureReadingFacetSet(JUNE_SECOND_2022, HUNDRED_DEGREES),
103            new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
104    addFastMatchFields(doc);
105    indexWriter.addDocument(doc);
106
107    indexWriter.close();
108  }
109
110  private void addFastMatchFields(Document doc) {
111    // day field
112    doc.add(new StringField("day", String.valueOf(MAY_SECOND_2022), Field.Store.NO));
113    doc.add(new StringField("day", String.valueOf(JUNE_SECOND_2022), Field.Store.NO));
114    doc.add(new StringField("day", String.valueOf(JULY_SECOND_2022), Field.Store.NO));
115
116    // temp field
117    doc.add(new StringField("temp", String.valueOf(EIGHTY_DEGREES), Field.Store.NO));
118    doc.add(new StringField("temp", String.valueOf(HUNDRED_DEGREES), Field.Store.NO));
119    doc.add(new StringField("temp", String.valueOf(HUNDRED_TWENTY_DEGREES), Field.Store.NO));
120  }
121
122  /** Counting documents which exactly match a given {@link FacetSet}. */
123  private FacetResult exactMatching() throws IOException {
124    try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
125      IndexSearcher searcher = new IndexSearcher(indexReader);
126
127      // MatchAllDocsQuery is for "browsing" (counts facets
128      // for all non-deleted docs in the index); normally
129      // you'd use a "normal" query:
130      FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
131
132      // Count both "May 2022, 100 degrees" and "July 2022, 120 degrees" dimensions
133      Facets facets =
134          new MatchingFacetSetsCounts(
135              "temperature",
136              fc,
137              TemperatureReadingFacetSet::decodeTemperatureReading,
138              new ExactFacetSetMatcher(
139                  "May 2022 (100f)",
140                  new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES)),
141              new ExactFacetSetMatcher(
142                  "July 2022 (120f)",
143                  new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
144
145      // Retrieve results
146      return facets.getAllChildren("temperature");
147    }
148  }
149
150  /**
151   * Counting documents which exactly match a given {@link FacetSet}. This example also demonstrates
152   * how to use a fast match query to improve the counting efficiency by skipping over documents
153   * which cannot possibly match a set.
154   */
155  private FacetResult exactMatchingWithFastMatchQuery() throws IOException {
156    try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
157      IndexSearcher searcher = new IndexSearcher(indexReader);
158
159      // MatchAllDocsQuery is for "browsing" (counts facets
160      // for all non-deleted docs in the index); normally
161      // you'd use a "normal" query:
162      FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
163
164      // Match documents whose "day" field is either "May 2022" or "July 2022"
165      Query dateQuery =
166          new TermInSetQuery(
167              "day",
168              Arrays.asList(
169                  new BytesRef(String.valueOf(MAY_SECOND_2022)),
170                  new BytesRef(String.valueOf(JULY_SECOND_2022))));
171      // Match documents whose "temp" field is either "80" or "120" degrees
172      Query temperatureQuery =
173          new TermInSetQuery(
174              "temp",
175              Arrays.asList(
176                  new BytesRef(String.valueOf(HUNDRED_DEGREES)),
177                  new BytesRef(String.valueOf(HUNDRED_TWENTY_DEGREES))));
178      // Documents must match both clauses
179      Query fastMatchQuery =
180          new BooleanQuery.Builder()
181              .add(dateQuery, BooleanClause.Occur.MUST)
182              .add(temperatureQuery, BooleanClause.Occur.MUST)
183              .build();
184
185      // Count both "May 2022, 100 degrees" and "July 2022, 120 degrees" dimensions
186      Facets facets =
187          new MatchingFacetSetsCounts(
188              "temperature",
189              fc,
190              TemperatureReadingFacetSet::decodeTemperatureReading,
191              fastMatchQuery,
192              new ExactFacetSetMatcher(
193                  "May 2022 (100f)",
194                  new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES)),
195              new ExactFacetSetMatcher(
196                  "July 2022 (120f)",
197                  new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
198
199      // Retrieve results
200      return facets.getAllChildren("temperature");
201    }
202  }
203  /** Counting documents which match a certain degrees value for any date. */
204  private FacetResult rangeMatching() throws IOException {
205    try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
206      IndexSearcher searcher = new IndexSearcher(indexReader);
207
208      // MatchAllDocsQuery is for "browsing" (counts facets
209      // for all non-deleted docs in the index); normally
210      // you'd use a "normal" query:
211      FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
212
213      // Count 80-100 degrees
214      Facets facets =
215          new MatchingFacetSetsCounts(
216              "temperature",
217              fc,
218              TemperatureReadingFacetSet::decodeTemperatureReading,
219              new RangeFacetSetMatcher(
220                  "Eighty to Hundred Degrees",
221                  DimRange.fromLongs(Long.MIN_VALUE, true, Long.MAX_VALUE, true),
222                  DimRange.fromFloats(EIGHTY_DEGREES, true, HUNDRED_DEGREES, true)));
223
224      // Retrieve results
225      return facets.getAllChildren("temperature");
226    }
227  }
228
229  /**
230   * Like {@link #rangeMatching()}, however this example demonstrates a custom {@link
231   * FacetSetMatcher} which only considers certain dimensions (in this case only the temperature
232   * one).
233   */
234  private FacetResult customRangeMatching() throws IOException {
235    try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
236      IndexSearcher searcher = new IndexSearcher(indexReader);
237
238      // MatchAllDocsQuery is for "browsing" (counts facets
239      // for all non-deleted docs in the index); normally
240      // you'd use a "normal" query:
241      FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
242
243      // Count 80-100 degrees
244      Facets facets =
245          new MatchingFacetSetsCounts(
246              "temperature",
247              fc,
248              TemperatureReadingFacetSet::decodeTemperatureReading,
249              new TemperatureOnlyFacetSetMatcher(
250                  "Eighty to Hundred Degrees",
251                  DimRange.fromFloats(EIGHTY_DEGREES, true, HUNDRED_DEGREES, true)));
252
253      // Retrieve results
254      return facets.getAllChildren("temperature");
255    }
256  }
257
258  private static long date(String dateString) {
259    return LocalDate.parse(dateString).atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli();
260  }
261
262  private static float fahrenheitToCelsius(int degrees) {
263    return (degrees - 32.0f) * 5.f / 9.f;
264  }
265
266  /** Runs the exact matching example. */
267  public FacetResult runExactMatching() throws IOException {
268    index();
269    return exactMatching();
270  }
271
272  /** Runs the exact matching with fast match query example. */
273  public FacetResult runExactMatchingWithFastMatchQuery() throws IOException {
274    index();
275    return exactMatchingWithFastMatchQuery();
276  }
277
278  /** Runs the range matching example. */
279  public FacetResult runRangeMatching() throws IOException {
280    index();
281    return rangeMatching();
282  }
283
284  /** Runs the custom range matching example. */
285  public FacetResult runCustomRangeMatching() throws IOException {
286    index();
287    return customRangeMatching();
288  }
289
290  /** Runs the search and drill-down examples and prints the results. */
291  public static void main(String[] args) throws Exception {
292    CustomFacetSetExample example = new CustomFacetSetExample();
293
294    System.out.println("Exact Facet Set matching example:");
295    System.out.println("-----------------------");
296    FacetResult result = example.runExactMatching();
297    System.out.println("Temperature Reading: " + result);
298
299    System.out.println("Exact Facet Set matching with fast match query example:");
300    System.out.println("-----------------------");
301    result = example.runExactMatchingWithFastMatchQuery();
302    System.out.println("Temperature Reading: " + result);
303
304    System.out.println("Range Facet Set matching example:");
305    System.out.println("-----------------------");
306    result = example.runRangeMatching();
307    System.out.println("Temperature Reading: " + result);
308
309    System.out.println("Custom Range Facet Set matching example:");
310    System.out.println("-----------------------");
311    result = example.runCustomRangeMatching();
312    System.out.println("Temperature Reading: " + result);
313  }
314
315  /**
316   * A {@link FacetSet} which encodes a temperature reading in a date (long) and degrees (celsius;
317   * float).
318   */
319  public static class TemperatureReadingFacetSet extends FacetSet {
320
321    private static final int SIZE_PACKED_BYTES = Long.BYTES + Float.BYTES;
322
323    private final long date;
324    private final float degrees;
325
326    /** Constructor */
327    public TemperatureReadingFacetSet(long date, float degrees) {
328      super(2); // We encode two dimensions
329
330      this.date = date;
331      this.degrees = degrees;
332    }
333
334    @Override
335    public long[] getComparableValues() {
336      return new long[] {date, NumericUtils.floatToSortableInt(degrees)};
337    }
338
339    @Override
340    public int packValues(byte[] buf, int start) {
341      LongPoint.encodeDimension(date, buf, start);
342      // Encode 'degrees' as a sortable integer.
343      FloatPoint.encodeDimension(degrees, buf, start + Long.BYTES);
344      return sizePackedBytes();
345    }
346
347    @Override
348    public int sizePackedBytes() {
349      return SIZE_PACKED_BYTES;
350    }
351
352    /**
353     * An implementation of {@link FacetSetDecoder#decode(BytesRef, int, long[])} for {@link
354     * TemperatureReadingFacetSet}.
355     */
356    public static int decodeTemperatureReading(BytesRef bytesRef, int start, long[] dest) {
357      dest[0] = LongPoint.decodeDimension(bytesRef.bytes, start);
358      // Decode the degrees as a sortable integer.
359      dest[1] = IntPoint.decodeDimension(bytesRef.bytes, start + Long.BYTES);
360      return SIZE_PACKED_BYTES;
361    }
362  }
363
364  /**
365   * A {@link FacetSetMatcher} which matches facet sets only by their temperature dimension,
366   * ignoring the date.
367   */
368  public static class TemperatureOnlyFacetSetMatcher extends FacetSetMatcher {
369
370    private final DimRange temperatureRange;
371
372    /** Constructor */
373    protected TemperatureOnlyFacetSetMatcher(String label, DimRange temperatureRange) {
374      super(label, 1); // We only evaluate one dimension
375
376      this.temperatureRange = temperatureRange;
377    }
378
379    @Override
380    public boolean matches(long[] dimValues) {
381      return temperatureRange.min <= dimValues[1] && temperatureRange.max >= dimValues[1];
382    }
383  }
384}