001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.lucene.demo.facet;
018
019import java.io.Closeable;
020import java.io.IOException;
021import java.text.ParseException;
022
023import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
024import org.apache.lucene.document.Document;
025import org.apache.lucene.document.DoublePoint;
026import org.apache.lucene.document.NumericDocValuesField;
027import org.apache.lucene.expressions.Expression;
028import org.apache.lucene.expressions.SimpleBindings;
029import org.apache.lucene.expressions.js.JavascriptCompiler;
030import org.apache.lucene.facet.DrillDownQuery;
031import org.apache.lucene.facet.DrillSideways;
032import org.apache.lucene.facet.FacetResult;
033import org.apache.lucene.facet.Facets;
034import org.apache.lucene.facet.FacetsCollector;
035import org.apache.lucene.facet.FacetsConfig;
036import org.apache.lucene.facet.range.DoubleRange;
037import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
038import org.apache.lucene.facet.taxonomy.TaxonomyReader;
039import org.apache.lucene.index.DirectoryReader;
040import org.apache.lucene.index.IndexWriter;
041import org.apache.lucene.index.IndexWriterConfig;
042import org.apache.lucene.index.IndexWriterConfig.OpenMode;
043import org.apache.lucene.search.BooleanClause;
044import org.apache.lucene.search.BooleanQuery;
045import org.apache.lucene.search.DoubleValuesSource;
046import org.apache.lucene.search.IndexSearcher;
047import org.apache.lucene.search.MatchAllDocsQuery;
048import org.apache.lucene.search.Query;
049import org.apache.lucene.search.SortField;
050import org.apache.lucene.search.TopDocs;
051import org.apache.lucene.store.Directory;
052import org.apache.lucene.store.RAMDirectory;
053import org.apache.lucene.util.SloppyMath;
054
055/** Shows simple usage of dynamic range faceting, using the
056 *  expressions module to calculate distance. */
057public class DistanceFacetsExample implements Closeable {
058
059  final DoubleRange ONE_KM = new DoubleRange("< 1 km", 0.0, true, 1.0, false);
060  final DoubleRange TWO_KM = new DoubleRange("< 2 km", 0.0, true, 2.0, false);
061  final DoubleRange FIVE_KM = new DoubleRange("< 5 km", 0.0, true, 5.0, false);
062  final DoubleRange TEN_KM = new DoubleRange("< 10 km", 0.0, true, 10.0, false);
063
064  private final Directory indexDir = new RAMDirectory();
065  private IndexSearcher searcher;
066  private final FacetsConfig config = new FacetsConfig();
067
068  /** The "home" latitude. */
069  public final static double ORIGIN_LATITUDE = 40.7143528;
070
071  /** The "home" longitude. */
072  public final static double ORIGIN_LONGITUDE = -74.0059731;
073
074  /** Mean radius of the Earth in KM
075   *
076   * NOTE: this is approximate, because the earth is a bit
077   * wider at the equator than the poles.  See
078   * http://en.wikipedia.org/wiki/Earth_radius */
079  // see http://earth-info.nga.mil/GandG/publications/tr8350.2/wgs84fin.pdf
080  public final static double EARTH_RADIUS_KM = 6_371.0087714;
081
082  /** Empty constructor */
083  public DistanceFacetsExample() {}
084  
085  /** Build the example index. */
086  public void index() throws IOException {
087    IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(
088        new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
089
090    // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter
091
092    // Add documents with latitude/longitude location:
093    // we index these both as DoublePoints (for bounding box/ranges) and as NumericDocValuesFields (for scoring)
094    Document doc = new Document();
095    doc.add(new DoublePoint("latitude", 40.759011));
096    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011)));
097    doc.add(new DoublePoint("longitude", -73.9844722));
098    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722)));
099    writer.addDocument(doc);
100    
101    doc = new Document();
102    doc.add(new DoublePoint("latitude", 40.718266));
103    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266)));
104    doc.add(new DoublePoint("longitude", -74.007819));
105    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819)));
106    writer.addDocument(doc);
107    
108    doc = new Document();
109    doc.add(new DoublePoint("latitude", 40.7051157));
110    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157)));
111    doc.add(new DoublePoint("longitude", -74.0088305));
112    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305)));
113    writer.addDocument(doc);
114
115    // Open near-real-time searcher
116    searcher = new IndexSearcher(DirectoryReader.open(writer));
117    writer.close();
118  }
119
120  private DoubleValuesSource getDistanceValueSource() {
121    Expression distance;
122    try {
123      distance = JavascriptCompiler.compile(
124                  "haversin(" + ORIGIN_LATITUDE + "," + ORIGIN_LONGITUDE + ",latitude,longitude)");
125    } catch (ParseException pe) {
126      // Should not happen
127      throw new RuntimeException(pe);
128    }
129    SimpleBindings bindings = new SimpleBindings();
130    bindings.add(new SortField("latitude", SortField.Type.DOUBLE));
131    bindings.add(new SortField("longitude", SortField.Type.DOUBLE));
132
133    return distance.getDoubleValuesSource(bindings);
134  }
135
136  /** Given a latitude and longitude (in degrees) and the
137   *  maximum great circle (surface of the earth) distance,
138   *  returns a simple Filter bounding box to "fast match"
139   *  candidates. */
140  public static Query getBoundingBoxQuery(double originLat, double originLng, double maxDistanceKM) {
141
142    // Basic bounding box geo math from
143    // http://JanMatuschek.de/LatitudeLongitudeBoundingCoordinates,
144    // licensed under creative commons 3.0:
145    // http://creativecommons.org/licenses/by/3.0
146
147    // TODO: maybe switch to recursive prefix tree instead
148    // (in lucene/spatial)?  It should be more efficient
149    // since it's a 2D trie...
150
151    // Degrees -> Radians:
152    double originLatRadians = SloppyMath.toRadians(originLat);
153    double originLngRadians = SloppyMath.toRadians(originLng);
154
155    double angle = maxDistanceKM / EARTH_RADIUS_KM;
156
157    double minLat = originLatRadians - angle;
158    double maxLat = originLatRadians + angle;
159
160    double minLng;
161    double maxLng;
162    if (minLat > SloppyMath.toRadians(-90) && maxLat < SloppyMath.toRadians(90)) {
163      double delta = Math.asin(Math.sin(angle)/Math.cos(originLatRadians));
164      minLng = originLngRadians - delta;
165      if (minLng < SloppyMath.toRadians(-180)) {
166        minLng += 2 * Math.PI;
167      }
168      maxLng = originLngRadians + delta;
169      if (maxLng > SloppyMath.toRadians(180)) {
170        maxLng -= 2 * Math.PI;
171      }
172    } else {
173      // The query includes a pole!
174      minLat = Math.max(minLat, SloppyMath.toRadians(-90));
175      maxLat = Math.min(maxLat, SloppyMath.toRadians(90));
176      minLng = SloppyMath.toRadians(-180);
177      maxLng = SloppyMath.toRadians(180);
178    }
179
180    BooleanQuery.Builder f = new BooleanQuery.Builder();
181
182    // Add latitude range filter:
183    f.add(DoublePoint.newRangeQuery("latitude", SloppyMath.toDegrees(minLat), SloppyMath.toDegrees(maxLat)),
184          BooleanClause.Occur.FILTER);
185
186    // Add longitude range filter:
187    if (minLng > maxLng) {
188      // The bounding box crosses the international date
189      // line:
190      BooleanQuery.Builder lonF = new BooleanQuery.Builder();
191      lonF.add(DoublePoint.newRangeQuery("longitude", SloppyMath.toDegrees(minLng), Double.POSITIVE_INFINITY),
192               BooleanClause.Occur.SHOULD);
193      lonF.add(DoublePoint.newRangeQuery("longitude", Double.NEGATIVE_INFINITY, SloppyMath.toDegrees(maxLng)),
194               BooleanClause.Occur.SHOULD);
195      f.add(lonF.build(), BooleanClause.Occur.MUST);
196    } else {
197      f.add(DoublePoint.newRangeQuery("longitude", SloppyMath.toDegrees(minLng), SloppyMath.toDegrees(maxLng)),
198            BooleanClause.Occur.FILTER);
199    }
200
201    return f.build();
202  }
203
204  /** User runs a query and counts facets. */
205  public FacetResult search() throws IOException {
206
207    FacetsCollector fc = new FacetsCollector();
208
209    searcher.search(new MatchAllDocsQuery(), fc);
210
211    Facets facets = new DoubleRangeFacetCounts("field", getDistanceValueSource(), fc,
212                                               getBoundingBoxQuery(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, 10.0),
213                                               ONE_KM,
214                                               TWO_KM,
215                                               FIVE_KM,
216                                               TEN_KM);
217
218    return facets.getTopChildren(10, "field");
219  }
220
221  /** User drills down on the specified range. */
222  public TopDocs drillDown(DoubleRange range) throws IOException {
223
224    // Passing no baseQuery means we drill down on all
225    // documents ("browse only"):
226    DrillDownQuery q = new DrillDownQuery(null);
227    final DoubleValuesSource vs = getDistanceValueSource();
228    q.add("field", range.getQuery(getBoundingBoxQuery(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, range.max), vs));
229    DrillSideways ds = new DrillSideways(searcher, config, (TaxonomyReader) null) {
230        @Override
231        protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {        
232          assert drillSideways.length == 1;
233          return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM);
234        }
235      };
236    return ds.search(q, 10).hits;
237  }
238
239  @Override
240  public void close() throws IOException {
241    searcher.getIndexReader().close();
242    indexDir.close();
243  }
244
245  /** Runs the search and drill-down examples and prints the results. */
246  public static void main(String[] args) throws Exception {
247    DistanceFacetsExample example = new DistanceFacetsExample();
248    example.index();
249
250    System.out.println("Distance facet counting example:");
251    System.out.println("-----------------------");
252    System.out.println(example.search());
253
254    System.out.println("Distance facet drill-down example (field/< 2 km):");
255    System.out.println("---------------------------------------------");
256    TopDocs hits = example.drillDown(example.TWO_KM);
257    System.out.println(hits.totalHits + " totalHits");
258
259    example.close();
260  }
261}