001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.lucene.demo.facet;
018
019
020import java.io.Closeable;
021import java.io.IOException;
022import java.text.ParseException;
023
024import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
025import org.apache.lucene.document.Document;
026import org.apache.lucene.document.DoubleField;
027import org.apache.lucene.document.Field;
028import org.apache.lucene.document.NumericDocValuesField;
029import org.apache.lucene.expressions.Expression;
030import org.apache.lucene.expressions.SimpleBindings;
031import org.apache.lucene.expressions.js.JavascriptCompiler;
032import org.apache.lucene.facet.DrillDownQuery;
033import org.apache.lucene.facet.DrillSideways;
034import org.apache.lucene.facet.FacetResult;
035import org.apache.lucene.facet.Facets;
036import org.apache.lucene.facet.FacetsCollector;
037import org.apache.lucene.facet.FacetsConfig;
038import org.apache.lucene.facet.range.DoubleRange;
039import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
040import org.apache.lucene.facet.taxonomy.TaxonomyReader;
041import org.apache.lucene.index.DirectoryReader;
042import org.apache.lucene.index.IndexWriter;
043import org.apache.lucene.index.IndexWriterConfig;
044import org.apache.lucene.index.IndexWriterConfig.OpenMode;
045import org.apache.lucene.queries.function.ValueSource;
046import org.apache.lucene.search.BooleanClause;
047import org.apache.lucene.search.BooleanQuery;
048import org.apache.lucene.search.IndexSearcher;
049import org.apache.lucene.search.MatchAllDocsQuery;
050import org.apache.lucene.search.NumericRangeQuery;
051import org.apache.lucene.search.Query;
052import org.apache.lucene.search.SortField;
053import org.apache.lucene.search.TopDocs;
054import org.apache.lucene.store.Directory;
055import org.apache.lucene.store.RAMDirectory;
056import org.apache.lucene.util.SloppyMath;
057
058/** Shows simple usage of dynamic range faceting, using the
059 *  expressions module to calculate distance. */
060public class DistanceFacetsExample implements Closeable {
061
062  final DoubleRange ONE_KM = new DoubleRange("< 1 km", 0.0, true, 1.0, false);
063  final DoubleRange TWO_KM = new DoubleRange("< 2 km", 0.0, true, 2.0, false);
064  final DoubleRange FIVE_KM = new DoubleRange("< 5 km", 0.0, true, 5.0, false);
065  final DoubleRange TEN_KM = new DoubleRange("< 10 km", 0.0, true, 10.0, false);
066
067  private final Directory indexDir = new RAMDirectory();
068  private IndexSearcher searcher;
069  private final FacetsConfig config = new FacetsConfig();
070
071  /** The "home" latitude. */
072  public final static double ORIGIN_LATITUDE = 40.7143528;
073
074  /** The "home" longitude. */
075  public final static double ORIGIN_LONGITUDE = -74.0059731;
076
077  /** Radius of the Earth in KM
078   *
079   * NOTE: this is approximate, because the earth is a bit
080   * wider at the equator than the poles.  See
081   * http://en.wikipedia.org/wiki/Earth_radius */
082  public final static double EARTH_RADIUS_KM = 6371.01;
083
084  /** Empty constructor */
085  public DistanceFacetsExample() {}
086  
087  /** Build the example index. */
088  public void index() throws IOException {
089    IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(
090        new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
091
092    // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter
093
094    // Add documents with latitude/longitude location:
095    // we index these both as DoubleFields (for bounding box/ranges) and as NumericDocValuesFields (for scoring)
096    Document doc = new Document();
097    doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO));
098    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011)));
099    doc.add(new DoubleField("longitude", -73.9844722, Field.Store.NO));
100    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722)));
101    writer.addDocument(doc);
102    
103    doc = new Document();
104    doc.add(new DoubleField("latitude", 40.718266, Field.Store.NO));
105    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266)));
106    doc.add(new DoubleField("longitude", -74.007819, Field.Store.NO));
107    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819)));
108    writer.addDocument(doc);
109    
110    doc = new Document();
111    doc.add(new DoubleField("latitude", 40.7051157, Field.Store.NO));
112    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157)));
113    doc.add(new DoubleField("longitude", -74.0088305, Field.Store.NO));
114    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305)));
115    writer.addDocument(doc);
116
117    // Open near-real-time searcher
118    searcher = new IndexSearcher(DirectoryReader.open(writer));
119    writer.close();
120  }
121
122  private ValueSource getDistanceValueSource() {
123    Expression distance;
124    try {
125      distance = JavascriptCompiler.compile(
126                  "haversin(" + ORIGIN_LATITUDE + "," + ORIGIN_LONGITUDE + ",latitude,longitude)");
127    } catch (ParseException pe) {
128      // Should not happen
129      throw new RuntimeException(pe);
130    }
131    SimpleBindings bindings = new SimpleBindings();
132    bindings.add(new SortField("latitude", SortField.Type.DOUBLE));
133    bindings.add(new SortField("longitude", SortField.Type.DOUBLE));
134
135    return distance.getValueSource(bindings);
136  }
137
138  /** Given a latitude and longitude (in degrees) and the
139   *  maximum great circle (surface of the earth) distance,
140   *  returns a simple Filter bounding box to "fast match"
141   *  candidates. */
142  public static Query getBoundingBoxQuery(double originLat, double originLng, double maxDistanceKM) {
143
144    // Basic bounding box geo math from
145    // http://JanMatuschek.de/LatitudeLongitudeBoundingCoordinates,
146    // licensed under creative commons 3.0:
147    // http://creativecommons.org/licenses/by/3.0
148
149    // TODO: maybe switch to recursive prefix tree instead
150    // (in lucene/spatial)?  It should be more efficient
151    // since it's a 2D trie...
152
153    // Degrees -> Radians:
154    double originLatRadians = Math.toRadians(originLat);
155    double originLngRadians = Math.toRadians(originLng);
156
157    double angle = maxDistanceKM / (SloppyMath.earthDiameter(originLat) / 2.0);
158
159    double minLat = originLatRadians - angle;
160    double maxLat = originLatRadians + angle;
161
162    double minLng;
163    double maxLng;
164    if (minLat > Math.toRadians(-90) && maxLat < Math.toRadians(90)) {
165      double delta = Math.asin(Math.sin(angle)/Math.cos(originLatRadians));
166      minLng = originLngRadians - delta;
167      if (minLng < Math.toRadians(-180)) {
168        minLng += 2 * Math.PI;
169      }
170      maxLng = originLngRadians + delta;
171      if (maxLng > Math.toRadians(180)) {
172        maxLng -= 2 * Math.PI;
173      }
174    } else {
175      // The query includes a pole!
176      minLat = Math.max(minLat, Math.toRadians(-90));
177      maxLat = Math.min(maxLat, Math.toRadians(90));
178      minLng = Math.toRadians(-180);
179      maxLng = Math.toRadians(180);
180    }
181
182    BooleanQuery.Builder f = new BooleanQuery.Builder();
183
184    // Add latitude range filter:
185    f.add(NumericRangeQuery.newDoubleRange("latitude", Math.toDegrees(minLat), Math.toDegrees(maxLat), true, true),
186          BooleanClause.Occur.FILTER);
187
188    // Add longitude range filter:
189    if (minLng > maxLng) {
190      // The bounding box crosses the international date
191      // line:
192      BooleanQuery.Builder lonF = new BooleanQuery.Builder();
193      lonF.add(NumericRangeQuery.newDoubleRange("longitude", Math.toDegrees(minLng), null, true, true),
194               BooleanClause.Occur.SHOULD);
195      lonF.add(NumericRangeQuery.newDoubleRange("longitude", null, Math.toDegrees(maxLng), true, true),
196               BooleanClause.Occur.SHOULD);
197      f.add(lonF.build(), BooleanClause.Occur.MUST);
198    } else {
199      f.add(NumericRangeQuery.newDoubleRange("longitude", Math.toDegrees(minLng), Math.toDegrees(maxLng), true, true),
200            BooleanClause.Occur.FILTER);
201    }
202
203    return f.build();
204  }
205
206  /** User runs a query and counts facets. */
207  public FacetResult search() throws IOException {
208
209    FacetsCollector fc = new FacetsCollector();
210
211    searcher.search(new MatchAllDocsQuery(), fc);
212
213    Facets facets = new DoubleRangeFacetCounts("field", getDistanceValueSource(), fc,
214                                               getBoundingBoxQuery(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, 10.0),
215                                               ONE_KM,
216                                               TWO_KM,
217                                               FIVE_KM,
218                                               TEN_KM);
219
220    return facets.getTopChildren(10, "field");
221  }
222
223  /** User drills down on the specified range. */
224  public TopDocs drillDown(DoubleRange range) throws IOException {
225
226    // Passing no baseQuery means we drill down on all
227    // documents ("browse only"):
228    DrillDownQuery q = new DrillDownQuery(null);
229    final ValueSource vs = getDistanceValueSource();
230    q.add("field", range.getQuery(getBoundingBoxQuery(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, range.max), vs));
231    DrillSideways ds = new DrillSideways(searcher, config, (TaxonomyReader) null) {
232        @Override
233        protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {        
234          assert drillSideways.length == 1;
235          return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM);
236        }
237      };
238    return ds.search(q, 10).hits;
239  }
240
241  @Override
242  public void close() throws IOException {
243    searcher.getIndexReader().close();
244    indexDir.close();
245  }
246
247  /** Runs the search and drill-down examples and prints the results. */
248  public static void main(String[] args) throws Exception {
249    DistanceFacetsExample example = new DistanceFacetsExample();
250    example.index();
251
252    System.out.println("Distance facet counting example:");
253    System.out.println("-----------------------");
254    System.out.println(example.search());
255
256    System.out.println("Distance facet drill-down example (field/< 2 km):");
257    System.out.println("---------------------------------------------");
258    TopDocs hits = example.drillDown(example.TWO_KM);
259    System.out.println(hits.totalHits + " totalHits");
260
261    example.close();
262  }
263}