001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.lucene.demo.facet; 018 019import java.io.IOException; 020import java.time.LocalDate; 021import java.time.ZoneOffset; 022import java.util.Arrays; 023import org.apache.lucene.analysis.core.WhitespaceAnalyzer; 024import org.apache.lucene.document.Document; 025import org.apache.lucene.document.Field; 026import org.apache.lucene.document.FloatPoint; 027import org.apache.lucene.document.IntPoint; 028import org.apache.lucene.document.LongPoint; 029import org.apache.lucene.document.StringField; 030import org.apache.lucene.facet.FacetResult; 031import org.apache.lucene.facet.Facets; 032import org.apache.lucene.facet.FacetsCollector; 033import org.apache.lucene.facet.FacetsCollectorManager; 034import org.apache.lucene.facet.facetset.DimRange; 035import org.apache.lucene.facet.facetset.ExactFacetSetMatcher; 036import org.apache.lucene.facet.facetset.FacetSet; 037import org.apache.lucene.facet.facetset.FacetSetDecoder; 038import org.apache.lucene.facet.facetset.FacetSetMatcher; 039import org.apache.lucene.facet.facetset.FacetSetsField; 040import org.apache.lucene.facet.facetset.MatchingFacetSetsCounts; 041import org.apache.lucene.facet.facetset.RangeFacetSetMatcher; 042import org.apache.lucene.index.DirectoryReader; 043import org.apache.lucene.index.IndexWriter; 044import org.apache.lucene.index.IndexWriterConfig; 045import org.apache.lucene.index.IndexWriterConfig.OpenMode; 046import org.apache.lucene.search.BooleanClause; 047import org.apache.lucene.search.BooleanQuery; 048import org.apache.lucene.search.IndexSearcher; 049import org.apache.lucene.search.MatchAllDocsQuery; 050import org.apache.lucene.search.Query; 051import org.apache.lucene.search.TermInSetQuery; 052import org.apache.lucene.store.ByteBuffersDirectory; 053import org.apache.lucene.store.Directory; 054import org.apache.lucene.util.BytesRef; 055import org.apache.lucene.util.NumericUtils; 056 057/** 058 * Shows usage of indexing and searching {@link FacetSetsField} with a custom {@link FacetSet} 059 * implementation. Unlike the out of the box {@link FacetSet} implementations, this example shows 060 * how to mix and match dimensions of different types, as well as implementing a custom {@link 061 * FacetSetMatcher}. 062 */ 063public class CustomFacetSetExample { 064 065 private static final long MAY_SECOND_2022 = date("2022-05-02"); 066 private static final long JUNE_SECOND_2022 = date("2022-06-02"); 067 private static final long JULY_SECOND_2022 = date("2022-07-02"); 068 private static final float HUNDRED_TWENTY_DEGREES = fahrenheitToCelsius(120); 069 private static final float HUNDRED_DEGREES = fahrenheitToCelsius(100); 070 private static final float EIGHTY_DEGREES = fahrenheitToCelsius(80); 071 072 private final Directory indexDir = new ByteBuffersDirectory(); 073 074 /** Empty constructor */ 075 public CustomFacetSetExample() {} 076 077 /** Build the example index. */ 078 private void index() throws IOException { 079 IndexWriter indexWriter = 080 new IndexWriter( 081 indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); 082 083 // Every document holds the temperature measures for a City by Date 084 085 Document doc = new Document(); 086 doc.add(new StringField("city", "city1", Field.Store.YES)); 087 doc.add( 088 FacetSetsField.create( 089 "temperature", 090 new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES), 091 new TemperatureReadingFacetSet(JUNE_SECOND_2022, EIGHTY_DEGREES), 092 new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES))); 093 addFastMatchFields(doc); 094 indexWriter.addDocument(doc); 095 096 doc = new Document(); 097 doc.add(new StringField("city", "city2", Field.Store.YES)); 098 doc.add( 099 FacetSetsField.create( 100 "temperature", 101 new TemperatureReadingFacetSet(MAY_SECOND_2022, EIGHTY_DEGREES), 102 new TemperatureReadingFacetSet(JUNE_SECOND_2022, HUNDRED_DEGREES), 103 new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES))); 104 addFastMatchFields(doc); 105 indexWriter.addDocument(doc); 106 107 indexWriter.close(); 108 } 109 110 private void addFastMatchFields(Document doc) { 111 // day field 112 doc.add(new StringField("day", String.valueOf(MAY_SECOND_2022), Field.Store.NO)); 113 doc.add(new StringField("day", String.valueOf(JUNE_SECOND_2022), Field.Store.NO)); 114 doc.add(new StringField("day", String.valueOf(JULY_SECOND_2022), Field.Store.NO)); 115 116 // temp field 117 doc.add(new StringField("temp", String.valueOf(EIGHTY_DEGREES), Field.Store.NO)); 118 doc.add(new StringField("temp", String.valueOf(HUNDRED_DEGREES), Field.Store.NO)); 119 doc.add(new StringField("temp", String.valueOf(HUNDRED_TWENTY_DEGREES), Field.Store.NO)); 120 } 121 122 /** Counting documents which exactly match a given {@link FacetSet}. */ 123 private FacetResult exactMatching() throws IOException { 124 try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) { 125 IndexSearcher searcher = new IndexSearcher(indexReader); 126 127 // MatchAllDocsQuery is for "browsing" (counts facets 128 // for all non-deleted docs in the index); normally 129 // you'd use a "normal" query: 130 FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager()); 131 132 // Count both "May 2022, 100 degrees" and "July 2022, 120 degrees" dimensions 133 Facets facets = 134 new MatchingFacetSetsCounts( 135 "temperature", 136 fc, 137 TemperatureReadingFacetSet::decodeTemperatureReading, 138 new ExactFacetSetMatcher( 139 "May 2022 (100f)", 140 new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES)), 141 new ExactFacetSetMatcher( 142 "July 2022 (120f)", 143 new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES))); 144 145 // Retrieve results 146 return facets.getAllChildren("temperature"); 147 } 148 } 149 150 /** 151 * Counting documents which exactly match a given {@link FacetSet}. This example also demonstrates 152 * how to use a fast match query to improve the counting efficiency by skipping over documents 153 * which cannot possibly match a set. 154 */ 155 private FacetResult exactMatchingWithFastMatchQuery() throws IOException { 156 try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) { 157 IndexSearcher searcher = new IndexSearcher(indexReader); 158 159 // MatchAllDocsQuery is for "browsing" (counts facets 160 // for all non-deleted docs in the index); normally 161 // you'd use a "normal" query: 162 FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager()); 163 164 // Match documents whose "day" field is either "May 2022" or "July 2022" 165 Query dateQuery = 166 new TermInSetQuery( 167 "day", 168 Arrays.asList( 169 new BytesRef(String.valueOf(MAY_SECOND_2022)), 170 new BytesRef(String.valueOf(JULY_SECOND_2022)))); 171 // Match documents whose "temp" field is either "80" or "120" degrees 172 Query temperatureQuery = 173 new TermInSetQuery( 174 "temp", 175 Arrays.asList( 176 new BytesRef(String.valueOf(HUNDRED_DEGREES)), 177 new BytesRef(String.valueOf(HUNDRED_TWENTY_DEGREES)))); 178 // Documents must match both clauses 179 Query fastMatchQuery = 180 new BooleanQuery.Builder() 181 .add(dateQuery, BooleanClause.Occur.MUST) 182 .add(temperatureQuery, BooleanClause.Occur.MUST) 183 .build(); 184 185 // Count both "May 2022, 100 degrees" and "July 2022, 120 degrees" dimensions 186 Facets facets = 187 new MatchingFacetSetsCounts( 188 "temperature", 189 fc, 190 TemperatureReadingFacetSet::decodeTemperatureReading, 191 fastMatchQuery, 192 new ExactFacetSetMatcher( 193 "May 2022 (100f)", 194 new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES)), 195 new ExactFacetSetMatcher( 196 "July 2022 (120f)", 197 new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES))); 198 199 // Retrieve results 200 return facets.getAllChildren("temperature"); 201 } 202 } 203 204 /** Counting documents which match a certain degrees value for any date. */ 205 private FacetResult rangeMatching() throws IOException { 206 try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) { 207 IndexSearcher searcher = new IndexSearcher(indexReader); 208 209 // MatchAllDocsQuery is for "browsing" (counts facets 210 // for all non-deleted docs in the index); normally 211 // you'd use a "normal" query: 212 FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager()); 213 214 // Count 80-100 degrees 215 Facets facets = 216 new MatchingFacetSetsCounts( 217 "temperature", 218 fc, 219 TemperatureReadingFacetSet::decodeTemperatureReading, 220 new RangeFacetSetMatcher( 221 "Eighty to Hundred Degrees", 222 DimRange.fromLongs(Long.MIN_VALUE, true, Long.MAX_VALUE, true), 223 DimRange.fromFloats(EIGHTY_DEGREES, true, HUNDRED_DEGREES, true))); 224 225 // Retrieve results 226 return facets.getAllChildren("temperature"); 227 } 228 } 229 230 /** 231 * Like {@link #rangeMatching()}, however this example demonstrates a custom {@link 232 * FacetSetMatcher} which only considers certain dimensions (in this case only the temperature 233 * one). 234 */ 235 private FacetResult customRangeMatching() throws IOException { 236 try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) { 237 IndexSearcher searcher = new IndexSearcher(indexReader); 238 239 // MatchAllDocsQuery is for "browsing" (counts facets 240 // for all non-deleted docs in the index); normally 241 // you'd use a "normal" query: 242 FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager()); 243 244 // Count 80-100 degrees 245 Facets facets = 246 new MatchingFacetSetsCounts( 247 "temperature", 248 fc, 249 TemperatureReadingFacetSet::decodeTemperatureReading, 250 new TemperatureOnlyFacetSetMatcher( 251 "Eighty to Hundred Degrees", 252 DimRange.fromFloats(EIGHTY_DEGREES, true, HUNDRED_DEGREES, true))); 253 254 // Retrieve results 255 return facets.getAllChildren("temperature"); 256 } 257 } 258 259 private static long date(String dateString) { 260 return LocalDate.parse(dateString).atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli(); 261 } 262 263 private static float fahrenheitToCelsius(int degrees) { 264 return (degrees - 32.0f) * 5.f / 9.f; 265 } 266 267 /** Runs the exact matching example. */ 268 public FacetResult runExactMatching() throws IOException { 269 index(); 270 return exactMatching(); 271 } 272 273 /** Runs the exact matching with fast match query example. */ 274 public FacetResult runExactMatchingWithFastMatchQuery() throws IOException { 275 index(); 276 return exactMatchingWithFastMatchQuery(); 277 } 278 279 /** Runs the range matching example. */ 280 public FacetResult runRangeMatching() throws IOException { 281 index(); 282 return rangeMatching(); 283 } 284 285 /** Runs the custom range matching example. */ 286 public FacetResult runCustomRangeMatching() throws IOException { 287 index(); 288 return customRangeMatching(); 289 } 290 291 /** Runs the search and drill-down examples and prints the results. */ 292 public static void main(String[] args) throws Exception { 293 CustomFacetSetExample example = new CustomFacetSetExample(); 294 295 System.out.println("Exact Facet Set matching example:"); 296 System.out.println("-----------------------"); 297 FacetResult result = example.runExactMatching(); 298 System.out.println("Temperature Reading: " + result); 299 300 System.out.println("Exact Facet Set matching with fast match query example:"); 301 System.out.println("-----------------------"); 302 result = example.runExactMatchingWithFastMatchQuery(); 303 System.out.println("Temperature Reading: " + result); 304 305 System.out.println("Range Facet Set matching example:"); 306 System.out.println("-----------------------"); 307 result = example.runRangeMatching(); 308 System.out.println("Temperature Reading: " + result); 309 310 System.out.println("Custom Range Facet Set matching example:"); 311 System.out.println("-----------------------"); 312 result = example.runCustomRangeMatching(); 313 System.out.println("Temperature Reading: " + result); 314 } 315 316 /** 317 * A {@link FacetSet} which encodes a temperature reading in a date (long) and degrees (celsius; 318 * float). 319 */ 320 public static class TemperatureReadingFacetSet extends FacetSet { 321 322 private static final int SIZE_PACKED_BYTES = Long.BYTES + Float.BYTES; 323 324 private final long date; 325 private final float degrees; 326 327 /** Constructor */ 328 public TemperatureReadingFacetSet(long date, float degrees) { 329 super(2); // We encode two dimensions 330 331 this.date = date; 332 this.degrees = degrees; 333 } 334 335 @Override 336 public long[] getComparableValues() { 337 return new long[] {date, NumericUtils.floatToSortableInt(degrees)}; 338 } 339 340 @Override 341 public int packValues(byte[] buf, int start) { 342 LongPoint.encodeDimension(date, buf, start); 343 // Encode 'degrees' as a sortable integer. 344 FloatPoint.encodeDimension(degrees, buf, start + Long.BYTES); 345 return sizePackedBytes(); 346 } 347 348 @Override 349 public int sizePackedBytes() { 350 return SIZE_PACKED_BYTES; 351 } 352 353 /** 354 * An implementation of {@link FacetSetDecoder#decode(BytesRef, int, long[])} for {@link 355 * TemperatureReadingFacetSet}. 356 */ 357 public static int decodeTemperatureReading(BytesRef bytesRef, int start, long[] dest) { 358 dest[0] = LongPoint.decodeDimension(bytesRef.bytes, start); 359 // Decode the degrees as a sortable integer. 360 dest[1] = IntPoint.decodeDimension(bytesRef.bytes, start + Long.BYTES); 361 return SIZE_PACKED_BYTES; 362 } 363 } 364 365 /** 366 * A {@link FacetSetMatcher} which matches facet sets only by their temperature dimension, 367 * ignoring the date. 368 */ 369 public static class TemperatureOnlyFacetSetMatcher extends FacetSetMatcher { 370 371 private final DimRange temperatureRange; 372 373 /** Constructor */ 374 protected TemperatureOnlyFacetSetMatcher(String label, DimRange temperatureRange) { 375 super(label, 1); // We only evaluate one dimension 376 377 this.temperatureRange = temperatureRange; 378 } 379 380 @Override 381 public boolean matches(long[] dimValues) { 382 return temperatureRange.min <= dimValues[1] && temperatureRange.max >= dimValues[1]; 383 } 384 } 385}