001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.lucene.demo.facet;
018
019import java.io.Closeable;
020import java.io.IOException;
021import java.text.ParseException;
022import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
023import org.apache.lucene.document.Document;
024import org.apache.lucene.document.DoublePoint;
025import org.apache.lucene.document.NumericDocValuesField;
026import org.apache.lucene.expressions.Expression;
027import org.apache.lucene.expressions.SimpleBindings;
028import org.apache.lucene.expressions.js.JavascriptCompiler;
029import org.apache.lucene.facet.DrillDownQuery;
030import org.apache.lucene.facet.DrillSideways;
031import org.apache.lucene.facet.FacetResult;
032import org.apache.lucene.facet.Facets;
033import org.apache.lucene.facet.FacetsCollector;
034import org.apache.lucene.facet.FacetsCollectorManager;
035import org.apache.lucene.facet.FacetsConfig;
036import org.apache.lucene.facet.range.DoubleRange;
037import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
038import org.apache.lucene.facet.taxonomy.TaxonomyReader;
039import org.apache.lucene.index.DirectoryReader;
040import org.apache.lucene.index.IndexWriter;
041import org.apache.lucene.index.IndexWriterConfig;
042import org.apache.lucene.index.IndexWriterConfig.OpenMode;
043import org.apache.lucene.search.BooleanClause;
044import org.apache.lucene.search.BooleanQuery;
045import org.apache.lucene.search.DoubleValuesSource;
046import org.apache.lucene.search.IndexSearcher;
047import org.apache.lucene.search.MatchAllDocsQuery;
048import org.apache.lucene.search.Query;
049import org.apache.lucene.search.TopDocs;
050import org.apache.lucene.store.ByteBuffersDirectory;
051import org.apache.lucene.store.Directory;
052import org.apache.lucene.util.IOUtils;
053
054/**
055 * Shows simple usage of dynamic range faceting, using the expressions module to calculate distance.
056 */
057public class DistanceFacetsExample implements Closeable {
058
059  final DoubleRange ONE_KM = new DoubleRange("< 1 km", 0.0, true, 1.0, false);
060  final DoubleRange TWO_KM = new DoubleRange("< 2 km", 0.0, true, 2.0, false);
061  final DoubleRange FIVE_KM = new DoubleRange("< 5 km", 0.0, true, 5.0, false);
062  final DoubleRange TEN_KM = new DoubleRange("< 10 km", 0.0, true, 10.0, false);
063
064  private final Directory indexDir = new ByteBuffersDirectory();
065  private IndexSearcher searcher;
066  private final FacetsConfig config = new FacetsConfig();
067
068  /** The "home" latitude. */
069  public static final double ORIGIN_LATITUDE = 40.7143528;
070
071  /** The "home" longitude. */
072  public static final double ORIGIN_LONGITUDE = -74.0059731;
073
074  /**
075   * Mean radius of the Earth in KM
076   *
077   * <p>NOTE: this is approximate, because the earth is a bit wider at the equator than the poles.
078   * See http://en.wikipedia.org/wiki/Earth_radius
079   */
080  // see http://earth-info.nga.mil/GandG/publications/tr8350.2/wgs84fin.pdf
081  public static final double EARTH_RADIUS_KM = 6_371.0087714;
082
083  /** Empty constructor */
084  public DistanceFacetsExample() {}
085
086  /** Build the example index. */
087  public void index() throws IOException {
088    IndexWriter writer =
089        new IndexWriter(
090            indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
091
092    // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter
093
094    // Add documents with latitude/longitude location:
095    // we index these both as DoublePoints (for bounding box/ranges) and as NumericDocValuesFields
096    // (for scoring)
097    Document doc = new Document();
098    doc.add(new DoublePoint("latitude", 40.759011));
099    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011)));
100    doc.add(new DoublePoint("longitude", -73.9844722));
101    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722)));
102    writer.addDocument(doc);
103
104    doc = new Document();
105    doc.add(new DoublePoint("latitude", 40.718266));
106    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266)));
107    doc.add(new DoublePoint("longitude", -74.007819));
108    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819)));
109    writer.addDocument(doc);
110
111    doc = new Document();
112    doc.add(new DoublePoint("latitude", 40.7051157));
113    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157)));
114    doc.add(new DoublePoint("longitude", -74.0088305));
115    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305)));
116    writer.addDocument(doc);
117
118    // Open near-real-time searcher
119    searcher = new IndexSearcher(DirectoryReader.open(writer));
120    writer.close();
121  }
122
123  // TODO: Would be nice to augment this example with documents containing multiple "locations",
124  // adding the ability to compute distance facets for the multi-valued case (see LUCENE-10245)
125  private DoubleValuesSource getDistanceValueSource() {
126    Expression distance;
127    try {
128      distance =
129          JavascriptCompiler.compile(
130              "haversin(" + ORIGIN_LATITUDE + "," + ORIGIN_LONGITUDE + ",latitude,longitude)");
131    } catch (ParseException pe) {
132      // Should not happen
133      throw new RuntimeException(pe);
134    }
135    SimpleBindings bindings = new SimpleBindings();
136    bindings.add("latitude", DoubleValuesSource.fromDoubleField("latitude"));
137    bindings.add("longitude", DoubleValuesSource.fromDoubleField("longitude"));
138
139    return distance.getDoubleValuesSource(bindings);
140  }
141
142  /**
143   * Given a latitude and longitude (in degrees) and the maximum great circle (surface of the earth)
144   * distance, returns a simple Filter bounding box to "fast match" candidates.
145   */
146  public static Query getBoundingBoxQuery(
147      double originLat, double originLng, double maxDistanceKM) {
148
149    // Basic bounding box geo math from
150    // http://JanMatuschek.de/LatitudeLongitudeBoundingCoordinates,
151    // licensed under creative commons 3.0:
152    // http://creativecommons.org/licenses/by/3.0
153
154    // TODO: maybe switch to recursive prefix tree instead
155    // (in lucene/spatial)?  It should be more efficient
156    // since it's a 2D trie...
157
158    // Degrees -> Radians:
159    double originLatRadians = Math.toRadians(originLat);
160    double originLngRadians = Math.toRadians(originLng);
161
162    double angle = maxDistanceKM / EARTH_RADIUS_KM;
163
164    double minLat = originLatRadians - angle;
165    double maxLat = originLatRadians + angle;
166
167    double minLng;
168    double maxLng;
169    if (minLat > Math.toRadians(-90) && maxLat < Math.toRadians(90)) {
170      double delta = Math.asin(Math.sin(angle) / Math.cos(originLatRadians));
171      minLng = originLngRadians - delta;
172      if (minLng < Math.toRadians(-180)) {
173        minLng += 2 * Math.PI;
174      }
175      maxLng = originLngRadians + delta;
176      if (maxLng > Math.toRadians(180)) {
177        maxLng -= 2 * Math.PI;
178      }
179    } else {
180      // The query includes a pole!
181      minLat = Math.max(minLat, Math.toRadians(-90));
182      maxLat = Math.min(maxLat, Math.toRadians(90));
183      minLng = Math.toRadians(-180);
184      maxLng = Math.toRadians(180);
185    }
186
187    BooleanQuery.Builder f = new BooleanQuery.Builder();
188
189    // Add latitude range filter:
190    f.add(
191        DoublePoint.newRangeQuery("latitude", Math.toDegrees(minLat), Math.toDegrees(maxLat)),
192        BooleanClause.Occur.FILTER);
193
194    // Add longitude range filter:
195    if (minLng > maxLng) {
196      // The bounding box crosses the international date
197      // line:
198      BooleanQuery.Builder lonF = new BooleanQuery.Builder();
199      lonF.add(
200          DoublePoint.newRangeQuery("longitude", Math.toDegrees(minLng), Double.POSITIVE_INFINITY),
201          BooleanClause.Occur.SHOULD);
202      lonF.add(
203          DoublePoint.newRangeQuery("longitude", Double.NEGATIVE_INFINITY, Math.toDegrees(maxLng)),
204          BooleanClause.Occur.SHOULD);
205      f.add(lonF.build(), BooleanClause.Occur.MUST);
206    } else {
207      f.add(
208          DoublePoint.newRangeQuery("longitude", Math.toDegrees(minLng), Math.toDegrees(maxLng)),
209          BooleanClause.Occur.FILTER);
210    }
211
212    return f.build();
213  }
214
215  /** User runs a query and counts facets. */
216  public FacetResult search() throws IOException {
217
218    FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
219
220    Facets facets =
221        new DoubleRangeFacetCounts(
222            "field",
223            getDistanceValueSource(),
224            fc,
225            getBoundingBoxQuery(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, 10.0),
226            ONE_KM,
227            TWO_KM,
228            FIVE_KM,
229            TEN_KM);
230
231    return facets.getTopChildren(10, "field");
232  }
233
234  /** User drills down on the specified range. */
235  public TopDocs drillDown(DoubleRange range) throws IOException {
236
237    // Passing no baseQuery means we drill down on all
238    // documents ("browse only"):
239    DrillDownQuery q = new DrillDownQuery(null);
240    final DoubleValuesSource vs = getDistanceValueSource();
241    q.add(
242        "field",
243        range.getQuery(getBoundingBoxQuery(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, range.max), vs));
244    DrillSideways ds =
245        new DrillSideways(searcher, config, (TaxonomyReader) null) {
246          @Override
247          protected Facets buildFacetsResult(
248              FacetsCollector drillDowns,
249              FacetsCollector[] drillSideways,
250              String[] drillSidewaysDims)
251              throws IOException {
252            assert drillSideways.length == 1;
253            return new DoubleRangeFacetCounts(
254                "field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM);
255          }
256        };
257    return ds.search(q, 10).hits;
258  }
259
260  @Override
261  public void close() throws IOException {
262    IOUtils.close(searcher.getIndexReader(), indexDir);
263  }
264
265  /** Runs the search and drill-down examples and prints the results. */
266  public static void main(String[] args) throws Exception {
267    DistanceFacetsExample example = new DistanceFacetsExample();
268    example.index();
269
270    System.out.println("Distance facet counting example:");
271    System.out.println("-----------------------");
272    System.out.println(example.search());
273
274    System.out.println("Distance facet drill-down example (field/< 2 km):");
275    System.out.println("---------------------------------------------");
276    TopDocs hits = example.drillDown(example.TWO_KM);
277    System.out.println(hits.totalHits + " totalHits");
278
279    example.close();
280  }
281}