001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.lucene.demo.facet;
018
019import java.io.IOException;
020import java.time.LocalDate;
021import java.time.ZoneOffset;
022import java.util.Arrays;
023import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
024import org.apache.lucene.document.Document;
025import org.apache.lucene.document.Field;
026import org.apache.lucene.document.FloatPoint;
027import org.apache.lucene.document.IntPoint;
028import org.apache.lucene.document.LongPoint;
029import org.apache.lucene.document.StringField;
030import org.apache.lucene.facet.FacetResult;
031import org.apache.lucene.facet.Facets;
032import org.apache.lucene.facet.FacetsCollector;
033import org.apache.lucene.facet.FacetsCollectorManager;
034import org.apache.lucene.facet.facetset.DimRange;
035import org.apache.lucene.facet.facetset.ExactFacetSetMatcher;
036import org.apache.lucene.facet.facetset.FacetSet;
037import org.apache.lucene.facet.facetset.FacetSetDecoder;
038import org.apache.lucene.facet.facetset.FacetSetMatcher;
039import org.apache.lucene.facet.facetset.FacetSetsField;
040import org.apache.lucene.facet.facetset.MatchingFacetSetsCounts;
041import org.apache.lucene.facet.facetset.RangeFacetSetMatcher;
042import org.apache.lucene.index.DirectoryReader;
043import org.apache.lucene.index.IndexWriter;
044import org.apache.lucene.index.IndexWriterConfig;
045import org.apache.lucene.index.IndexWriterConfig.OpenMode;
046import org.apache.lucene.search.BooleanClause;
047import org.apache.lucene.search.BooleanQuery;
048import org.apache.lucene.search.IndexSearcher;
049import org.apache.lucene.search.MatchAllDocsQuery;
050import org.apache.lucene.search.Query;
051import org.apache.lucene.search.TermInSetQuery;
052import org.apache.lucene.store.ByteBuffersDirectory;
053import org.apache.lucene.store.Directory;
054import org.apache.lucene.util.BytesRef;
055import org.apache.lucene.util.NumericUtils;
056
057/**
058 * Shows usage of indexing and searching {@link FacetSetsField} with a custom {@link FacetSet}
059 * implementation. Unlike the out of the box {@link FacetSet} implementations, this example shows
060 * how to mix and match dimensions of different types, as well as implementing a custom {@link
061 * FacetSetMatcher}.
062 */
063public class CustomFacetSetExample {
064
065  private static final long MAY_SECOND_2022 = date("2022-05-02");
066  private static final long JUNE_SECOND_2022 = date("2022-06-02");
067  private static final long JULY_SECOND_2022 = date("2022-07-02");
068  private static final float HUNDRED_TWENTY_DEGREES = fahrenheitToCelsius(120);
069  private static final float HUNDRED_DEGREES = fahrenheitToCelsius(100);
070  private static final float EIGHTY_DEGREES = fahrenheitToCelsius(80);
071
072  private final Directory indexDir = new ByteBuffersDirectory();
073
074  /** Empty constructor */
075  public CustomFacetSetExample() {}
076
077  /** Build the example index. */
078  private void index() throws IOException {
079    IndexWriter indexWriter =
080        new IndexWriter(
081            indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
082
083    // Every document holds the temperature measures for a City by Date
084
085    Document doc = new Document();
086    doc.add(new StringField("city", "city1", Field.Store.YES));
087    doc.add(
088        FacetSetsField.create(
089            "temperature",
090            new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES),
091            new TemperatureReadingFacetSet(JUNE_SECOND_2022, EIGHTY_DEGREES),
092            new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
093    addFastMatchFields(doc);
094    indexWriter.addDocument(doc);
095
096    doc = new Document();
097    doc.add(new StringField("city", "city2", Field.Store.YES));
098    doc.add(
099        FacetSetsField.create(
100            "temperature",
101            new TemperatureReadingFacetSet(MAY_SECOND_2022, EIGHTY_DEGREES),
102            new TemperatureReadingFacetSet(JUNE_SECOND_2022, HUNDRED_DEGREES),
103            new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
104    addFastMatchFields(doc);
105    indexWriter.addDocument(doc);
106
107    indexWriter.close();
108  }
109
110  private void addFastMatchFields(Document doc) {
111    // day field
112    doc.add(new StringField("day", String.valueOf(MAY_SECOND_2022), Field.Store.NO));
113    doc.add(new StringField("day", String.valueOf(JUNE_SECOND_2022), Field.Store.NO));
114    doc.add(new StringField("day", String.valueOf(JULY_SECOND_2022), Field.Store.NO));
115
116    // temp field
117    doc.add(new StringField("temp", String.valueOf(EIGHTY_DEGREES), Field.Store.NO));
118    doc.add(new StringField("temp", String.valueOf(HUNDRED_DEGREES), Field.Store.NO));
119    doc.add(new StringField("temp", String.valueOf(HUNDRED_TWENTY_DEGREES), Field.Store.NO));
120  }
121
122  /** Counting documents which exactly match a given {@link FacetSet}. */
123  private FacetResult exactMatching() throws IOException {
124    try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
125      IndexSearcher searcher = new IndexSearcher(indexReader);
126
127      // MatchAllDocsQuery is for "browsing" (counts facets
128      // for all non-deleted docs in the index); normally
129      // you'd use a "normal" query:
130      FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
131
132      // Count both "May 2022, 100 degrees" and "July 2022, 120 degrees" dimensions
133      Facets facets =
134          new MatchingFacetSetsCounts(
135              "temperature",
136              fc,
137              TemperatureReadingFacetSet::decodeTemperatureReading,
138              new ExactFacetSetMatcher(
139                  "May 2022 (100f)",
140                  new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES)),
141              new ExactFacetSetMatcher(
142                  "July 2022 (120f)",
143                  new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
144
145      // Retrieve results
146      return facets.getAllChildren("temperature");
147    }
148  }
149
150  /**
151   * Counting documents which exactly match a given {@link FacetSet}. This example also demonstrates
152   * how to use a fast match query to improve the counting efficiency by skipping over documents
153   * which cannot possibly match a set.
154   */
155  private FacetResult exactMatchingWithFastMatchQuery() throws IOException {
156    try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
157      IndexSearcher searcher = new IndexSearcher(indexReader);
158
159      // MatchAllDocsQuery is for "browsing" (counts facets
160      // for all non-deleted docs in the index); normally
161      // you'd use a "normal" query:
162      FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
163
164      // Match documents whose "day" field is either "May 2022" or "July 2022"
165      Query dateQuery =
166          new TermInSetQuery(
167              "day",
168              Arrays.asList(
169                  new BytesRef(String.valueOf(MAY_SECOND_2022)),
170                  new BytesRef(String.valueOf(JULY_SECOND_2022))));
171      // Match documents whose "temp" field is either "80" or "120" degrees
172      Query temperatureQuery =
173          new TermInSetQuery(
174              "temp",
175              Arrays.asList(
176                  new BytesRef(String.valueOf(HUNDRED_DEGREES)),
177                  new BytesRef(String.valueOf(HUNDRED_TWENTY_DEGREES))));
178      // Documents must match both clauses
179      Query fastMatchQuery =
180          new BooleanQuery.Builder()
181              .add(dateQuery, BooleanClause.Occur.MUST)
182              .add(temperatureQuery, BooleanClause.Occur.MUST)
183              .build();
184
185      // Count both "May 2022, 100 degrees" and "July 2022, 120 degrees" dimensions
186      Facets facets =
187          new MatchingFacetSetsCounts(
188              "temperature",
189              fc,
190              TemperatureReadingFacetSet::decodeTemperatureReading,
191              fastMatchQuery,
192              new ExactFacetSetMatcher(
193                  "May 2022 (100f)",
194                  new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES)),
195              new ExactFacetSetMatcher(
196                  "July 2022 (120f)",
197                  new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
198
199      // Retrieve results
200      return facets.getAllChildren("temperature");
201    }
202  }
203
204  /** Counting documents which match a certain degrees value for any date. */
205  private FacetResult rangeMatching() throws IOException {
206    try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
207      IndexSearcher searcher = new IndexSearcher(indexReader);
208
209      // MatchAllDocsQuery is for "browsing" (counts facets
210      // for all non-deleted docs in the index); normally
211      // you'd use a "normal" query:
212      FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
213
214      // Count 80-100 degrees
215      Facets facets =
216          new MatchingFacetSetsCounts(
217              "temperature",
218              fc,
219              TemperatureReadingFacetSet::decodeTemperatureReading,
220              new RangeFacetSetMatcher(
221                  "Eighty to Hundred Degrees",
222                  DimRange.fromLongs(Long.MIN_VALUE, true, Long.MAX_VALUE, true),
223                  DimRange.fromFloats(EIGHTY_DEGREES, true, HUNDRED_DEGREES, true)));
224
225      // Retrieve results
226      return facets.getAllChildren("temperature");
227    }
228  }
229
230  /**
231   * Like {@link #rangeMatching()}, however this example demonstrates a custom {@link
232   * FacetSetMatcher} which only considers certain dimensions (in this case only the temperature
233   * one).
234   */
235  private FacetResult customRangeMatching() throws IOException {
236    try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
237      IndexSearcher searcher = new IndexSearcher(indexReader);
238
239      // MatchAllDocsQuery is for "browsing" (counts facets
240      // for all non-deleted docs in the index); normally
241      // you'd use a "normal" query:
242      FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
243
244      // Count 80-100 degrees
245      Facets facets =
246          new MatchingFacetSetsCounts(
247              "temperature",
248              fc,
249              TemperatureReadingFacetSet::decodeTemperatureReading,
250              new TemperatureOnlyFacetSetMatcher(
251                  "Eighty to Hundred Degrees",
252                  DimRange.fromFloats(EIGHTY_DEGREES, true, HUNDRED_DEGREES, true)));
253
254      // Retrieve results
255      return facets.getAllChildren("temperature");
256    }
257  }
258
259  private static long date(String dateString) {
260    return LocalDate.parse(dateString).atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli();
261  }
262
263  private static float fahrenheitToCelsius(int degrees) {
264    return (degrees - 32.0f) * 5.f / 9.f;
265  }
266
267  /** Runs the exact matching example. */
268  public FacetResult runExactMatching() throws IOException {
269    index();
270    return exactMatching();
271  }
272
273  /** Runs the exact matching with fast match query example. */
274  public FacetResult runExactMatchingWithFastMatchQuery() throws IOException {
275    index();
276    return exactMatchingWithFastMatchQuery();
277  }
278
279  /** Runs the range matching example. */
280  public FacetResult runRangeMatching() throws IOException {
281    index();
282    return rangeMatching();
283  }
284
285  /** Runs the custom range matching example. */
286  public FacetResult runCustomRangeMatching() throws IOException {
287    index();
288    return customRangeMatching();
289  }
290
291  /** Runs the search and drill-down examples and prints the results. */
292  public static void main(String[] args) throws Exception {
293    CustomFacetSetExample example = new CustomFacetSetExample();
294
295    System.out.println("Exact Facet Set matching example:");
296    System.out.println("-----------------------");
297    FacetResult result = example.runExactMatching();
298    System.out.println("Temperature Reading: " + result);
299
300    System.out.println("Exact Facet Set matching with fast match query example:");
301    System.out.println("-----------------------");
302    result = example.runExactMatchingWithFastMatchQuery();
303    System.out.println("Temperature Reading: " + result);
304
305    System.out.println("Range Facet Set matching example:");
306    System.out.println("-----------------------");
307    result = example.runRangeMatching();
308    System.out.println("Temperature Reading: " + result);
309
310    System.out.println("Custom Range Facet Set matching example:");
311    System.out.println("-----------------------");
312    result = example.runCustomRangeMatching();
313    System.out.println("Temperature Reading: " + result);
314  }
315
316  /**
317   * A {@link FacetSet} which encodes a temperature reading in a date (long) and degrees (celsius;
318   * float).
319   */
320  public static class TemperatureReadingFacetSet extends FacetSet {
321
322    private static final int SIZE_PACKED_BYTES = Long.BYTES + Float.BYTES;
323
324    private final long date;
325    private final float degrees;
326
327    /** Constructor */
328    public TemperatureReadingFacetSet(long date, float degrees) {
329      super(2); // We encode two dimensions
330
331      this.date = date;
332      this.degrees = degrees;
333    }
334
335    @Override
336    public long[] getComparableValues() {
337      return new long[] {date, NumericUtils.floatToSortableInt(degrees)};
338    }
339
340    @Override
341    public int packValues(byte[] buf, int start) {
342      LongPoint.encodeDimension(date, buf, start);
343      // Encode 'degrees' as a sortable integer.
344      FloatPoint.encodeDimension(degrees, buf, start + Long.BYTES);
345      return sizePackedBytes();
346    }
347
348    @Override
349    public int sizePackedBytes() {
350      return SIZE_PACKED_BYTES;
351    }
352
353    /**
354     * An implementation of {@link FacetSetDecoder#decode(BytesRef, int, long[])} for {@link
355     * TemperatureReadingFacetSet}.
356     */
357    public static int decodeTemperatureReading(BytesRef bytesRef, int start, long[] dest) {
358      dest[0] = LongPoint.decodeDimension(bytesRef.bytes, start);
359      // Decode the degrees as a sortable integer.
360      dest[1] = IntPoint.decodeDimension(bytesRef.bytes, start + Long.BYTES);
361      return SIZE_PACKED_BYTES;
362    }
363  }
364
365  /**
366   * A {@link FacetSetMatcher} which matches facet sets only by their temperature dimension,
367   * ignoring the date.
368   */
369  public static class TemperatureOnlyFacetSetMatcher extends FacetSetMatcher {
370
371    private final DimRange temperatureRange;
372
373    /** Constructor */
374    protected TemperatureOnlyFacetSetMatcher(String label, DimRange temperatureRange) {
375      super(label, 1); // We only evaluate one dimension
376
377      this.temperatureRange = temperatureRange;
378    }
379
380    @Override
381    public boolean matches(long[] dimValues) {
382      return temperatureRange.min <= dimValues[1] && temperatureRange.max >= dimValues[1];
383    }
384  }
385}