001    package org.apache.lucene.demo.facet;
002    
003    /*
004     * Licensed to the Apache Software Foundation (ASF) under one or more
005     * contributor license agreements.  See the NOTICE file distributed with
006     * this work for additional information regarding copyright ownership.
007     * The ASF licenses this file to You under the Apache License, Version 2.0
008     * (the "License"); you may not use this file except in compliance with
009     * the License.  You may obtain a copy of the License at
010     *
011     *     http://www.apache.org/licenses/LICENSE-2.0
012     *
013     * Unless required by applicable law or agreed to in writing, software
014     * distributed under the License is distributed on an "AS IS" BASIS,
015     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016     * See the License for the specific language governing permissions and
017     * limitations under the License.
018     */
019    
020    import java.io.Closeable;
021    import java.io.IOException;
022    import java.text.ParseException;
023    
024    import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
025    import org.apache.lucene.document.Document;
026    import org.apache.lucene.document.DoubleField;
027    import org.apache.lucene.document.Field;
028    import org.apache.lucene.expressions.Expression;
029    import org.apache.lucene.expressions.SimpleBindings;
030    import org.apache.lucene.expressions.js.JavascriptCompiler;
031    import org.apache.lucene.facet.DrillDownQuery;
032    import org.apache.lucene.facet.DrillSideways;
033    import org.apache.lucene.facet.FacetResult;
034    import org.apache.lucene.facet.Facets;
035    import org.apache.lucene.facet.FacetsCollector;
036    import org.apache.lucene.facet.FacetsConfig;
037    import org.apache.lucene.facet.range.DoubleRange;
038    import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
039    import org.apache.lucene.facet.taxonomy.TaxonomyReader;
040    import org.apache.lucene.index.DirectoryReader;
041    import org.apache.lucene.index.IndexWriter;
042    import org.apache.lucene.index.IndexWriterConfig;
043    import org.apache.lucene.queries.BooleanFilter;
044    import org.apache.lucene.queries.function.ValueSource;
045    import org.apache.lucene.search.BooleanClause;
046    import org.apache.lucene.search.Filter;
047    import org.apache.lucene.search.IndexSearcher;
048    import org.apache.lucene.search.MatchAllDocsQuery;
049    import org.apache.lucene.search.NumericRangeFilter;
050    import org.apache.lucene.search.SortField;
051    import org.apache.lucene.search.TopDocs;
052    import org.apache.lucene.store.Directory;
053    import org.apache.lucene.store.RAMDirectory;
054    
055    
056    
057    /** Shows simple usage of dynamic range faceting, using the
058     *  expressions module to calculate distance. */
059    public class DistanceFacetsExample implements Closeable {
060    
061      final DoubleRange ONE_KM = new DoubleRange("< 1 km", 0.0, true, 1.0, false);
062      final DoubleRange TWO_KM = new DoubleRange("< 2 km", 0.0, true, 2.0, false);
063      final DoubleRange FIVE_KM = new DoubleRange("< 5 km", 0.0, true, 5.0, false);
064      final DoubleRange TEN_KM = new DoubleRange("< 10 km", 0.0, true, 10.0, false);
065    
066      private final Directory indexDir = new RAMDirectory();
067      private IndexSearcher searcher;
068      private final FacetsConfig config = new FacetsConfig();
069    
070      /** The "home" latitude. */
071      public final static double ORIGIN_LATITUDE = 40.7143528;
072    
073      /** The "home" longitude. */
074      public final static double ORIGIN_LONGITUDE = -74.0059731;
075    
076      /** Radius of the Earth in KM
077       *
078       * NOTE: this is approximate, because the earth is a bit
079       * wider at the equator than the poles.  See
080       * http://en.wikipedia.org/wiki/Earth_radius */
081      public final static double EARTH_RADIUS_KM = 6371.01;
082    
083      /** Empty constructor */
084      public DistanceFacetsExample() {}
085      
086      /** Build the example index. */
087      public void index() throws IOException {
088        IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
089            new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));
090    
091        // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter
092    
093        // Add documents with latitude/longitude location:
094        Document doc = new Document();
095        doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO));
096        doc.add(new DoubleField("longitude", -73.9844722, Field.Store.NO));
097        writer.addDocument(doc);
098        
099        doc = new Document();
100        doc.add(new DoubleField("latitude", 40.718266, Field.Store.NO));
101        doc.add(new DoubleField("longitude", -74.007819, Field.Store.NO));
102        writer.addDocument(doc);
103        
104        doc = new Document();
105        doc.add(new DoubleField("latitude", 40.7051157, Field.Store.NO));
106        doc.add(new DoubleField("longitude", -74.0088305, Field.Store.NO));
107        writer.addDocument(doc);
108    
109        // Open near-real-time searcher
110        searcher = new IndexSearcher(DirectoryReader.open(writer, true));
111        writer.close();
112      }
113    
114      private ValueSource getDistanceValueSource() {
115        Expression distance;
116        try {
117          distance = JavascriptCompiler.compile(
118                      "haversin(" + ORIGIN_LATITUDE + "," + ORIGIN_LONGITUDE + ",latitude,longitude)");
119        } catch (ParseException pe) {
120          // Should not happen
121          throw new RuntimeException(pe);
122        }
123        SimpleBindings bindings = new SimpleBindings();
124        bindings.add(new SortField("latitude", SortField.Type.DOUBLE));
125        bindings.add(new SortField("longitude", SortField.Type.DOUBLE));
126    
127        return distance.getValueSource(bindings);
128      }
129    
130      /** Given a latitude and longitude (in degrees) and the
131       *  maximum great circle (surface of the earth) distance,
132       *  returns a simple Filter bounding box to "fast match"
133       *  candidates. */
134      public static Filter getBoundingBoxFilter(double originLat, double originLng, double maxDistanceKM) {
135    
136        // Basic bounding box geo math from
137        // http://JanMatuschek.de/LatitudeLongitudeBoundingCoordinates,
138        // licensed under creative commons 3.0:
139        // http://creativecommons.org/licenses/by/3.0
140    
141        // TODO: maybe switch to recursive prefix tree instead
142        // (in lucene/spatial)?  It should be more efficient
143        // since it's a 2D trie...
144    
145        // Degrees -> Radians:
146        double originLatRadians = Math.toRadians(originLat);
147        double originLngRadians = Math.toRadians(originLng);
148    
149        double angle = maxDistanceKM / EARTH_RADIUS_KM;
150    
151        double minLat = originLatRadians - angle;
152        double maxLat = originLatRadians + angle;
153    
154        double minLng;
155        double maxLng;
156        if (minLat > Math.toRadians(-90) && maxLat < Math.toRadians(90)) {
157          double delta = Math.asin(Math.sin(angle)/Math.cos(originLatRadians));
158          minLng = originLngRadians - delta;
159          if (minLng < Math.toRadians(-180)) {
160            minLng += 2 * Math.PI;
161          }
162          maxLng = originLngRadians + delta;
163          if (maxLng > Math.toRadians(180)) {
164            maxLng -= 2 * Math.PI;
165          }
166        } else {
167          // The query includes a pole!
168          minLat = Math.max(minLat, Math.toRadians(-90));
169          maxLat = Math.min(maxLat, Math.toRadians(90));
170          minLng = Math.toRadians(-180);
171          maxLng = Math.toRadians(180);
172        }
173    
174        BooleanFilter f = new BooleanFilter();
175    
176        // Add latitude range filter:
177        f.add(NumericRangeFilter.newDoubleRange("latitude", Math.toDegrees(minLat), Math.toDegrees(maxLat), true, true),
178              BooleanClause.Occur.MUST);
179    
180        // Add longitude range filter:
181        if (minLng > maxLng) {
182          // The bounding box crosses the international date
183          // line:
184          BooleanFilter lonF = new BooleanFilter();
185          lonF.add(NumericRangeFilter.newDoubleRange("longitude", Math.toDegrees(minLng), null, true, true),
186                   BooleanClause.Occur.SHOULD);
187          lonF.add(NumericRangeFilter.newDoubleRange("longitude", null, Math.toDegrees(maxLng), true, true),
188                   BooleanClause.Occur.SHOULD);
189          f.add(lonF, BooleanClause.Occur.MUST);
190        } else {
191          f.add(NumericRangeFilter.newDoubleRange("longitude", Math.toDegrees(minLng), Math.toDegrees(maxLng), true, true),
192                BooleanClause.Occur.MUST);
193        }
194    
195        return f;
196      }
197    
198      /** User runs a query and counts facets. */
199      public FacetResult search() throws IOException {
200    
201        FacetsCollector fc = new FacetsCollector();
202    
203        searcher.search(new MatchAllDocsQuery(), fc);
204    
205        Facets facets = new DoubleRangeFacetCounts("field", getDistanceValueSource(), fc,
206                                                   getBoundingBoxFilter(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, 10.0),
207                                                   ONE_KM,
208                                                   TWO_KM,
209                                                   FIVE_KM,
210                                                   TEN_KM);
211    
212        return facets.getTopChildren(10, "field");
213      }
214    
215      /** User drills down on the specified range. */
216      public TopDocs drillDown(DoubleRange range) throws IOException {
217    
218        // Passing no baseQuery means we drill down on all
219        // documents ("browse only"):
220        DrillDownQuery q = new DrillDownQuery(null);
221        final ValueSource vs = getDistanceValueSource();
222        q.add("field", range.getFilter(getBoundingBoxFilter(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, range.max), vs));
223        DrillSideways ds = new DrillSideways(searcher, config, (TaxonomyReader) null) {
224            @Override
225            protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {        
226              assert drillSideways.length == 1;
227              return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM);
228            }
229          };
230        return ds.search(q, 10).hits;
231      }
232    
233      @Override
234      public void close() throws IOException {
235        searcher.getIndexReader().close();
236        indexDir.close();
237      }
238    
239      /** Runs the search and drill-down examples and prints the results. */
240      @SuppressWarnings("unchecked")
241      public static void main(String[] args) throws Exception {
242        DistanceFacetsExample example = new DistanceFacetsExample();
243        example.index();
244    
245        System.out.println("Distance facet counting example:");
246        System.out.println("-----------------------");
247        System.out.println(example.search());
248    
249        System.out.println("\n");
250        System.out.println("Distance facet drill-down example (field/< 2 km):");
251        System.out.println("---------------------------------------------");
252        TopDocs hits = example.drillDown(example.TWO_KM);
253        System.out.println(hits.totalHits + " totalHits");
254    
255        example.close();
256      }
257    }