001 package org.apache.lucene.demo.facet;
002
003 /*
004 * Licensed to the Apache Software Foundation (ASF) under one or more
005 * contributor license agreements. See the NOTICE file distributed with
006 * this work for additional information regarding copyright ownership.
007 * The ASF licenses this file to You under the Apache License, Version 2.0
008 * (the "License"); you may not use this file except in compliance with
009 * the License. You may obtain a copy of the License at
010 *
011 * http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020 import java.io.Closeable;
021 import java.io.IOException;
022 import java.text.ParseException;
023
024 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
025 import org.apache.lucene.document.Document;
026 import org.apache.lucene.document.DoubleField;
027 import org.apache.lucene.document.Field;
028 import org.apache.lucene.expressions.Expression;
029 import org.apache.lucene.expressions.SimpleBindings;
030 import org.apache.lucene.expressions.js.JavascriptCompiler;
031 import org.apache.lucene.facet.DrillDownQuery;
032 import org.apache.lucene.facet.DrillSideways;
033 import org.apache.lucene.facet.FacetResult;
034 import org.apache.lucene.facet.Facets;
035 import org.apache.lucene.facet.FacetsCollector;
036 import org.apache.lucene.facet.FacetsConfig;
037 import org.apache.lucene.facet.range.DoubleRange;
038 import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
039 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
040 import org.apache.lucene.index.DirectoryReader;
041 import org.apache.lucene.index.IndexWriter;
042 import org.apache.lucene.index.IndexWriterConfig;
043 import org.apache.lucene.queries.BooleanFilter;
044 import org.apache.lucene.queries.function.ValueSource;
045 import org.apache.lucene.search.BooleanClause;
046 import org.apache.lucene.search.Filter;
047 import org.apache.lucene.search.IndexSearcher;
048 import org.apache.lucene.search.MatchAllDocsQuery;
049 import org.apache.lucene.search.NumericRangeFilter;
050 import org.apache.lucene.search.SortField;
051 import org.apache.lucene.search.TopDocs;
052 import org.apache.lucene.store.Directory;
053 import org.apache.lucene.store.RAMDirectory;
054
055
056
057 /** Shows simple usage of dynamic range faceting, using the
058 * expressions module to calculate distance. */
059 public class DistanceFacetsExample implements Closeable {
060
061 final DoubleRange ONE_KM = new DoubleRange("< 1 km", 0.0, true, 1.0, false);
062 final DoubleRange TWO_KM = new DoubleRange("< 2 km", 0.0, true, 2.0, false);
063 final DoubleRange FIVE_KM = new DoubleRange("< 5 km", 0.0, true, 5.0, false);
064 final DoubleRange TEN_KM = new DoubleRange("< 10 km", 0.0, true, 10.0, false);
065
066 private final Directory indexDir = new RAMDirectory();
067 private IndexSearcher searcher;
068 private final FacetsConfig config = new FacetsConfig();
069
070 /** The "home" latitude. */
071 public final static double ORIGIN_LATITUDE = 40.7143528;
072
073 /** The "home" longitude. */
074 public final static double ORIGIN_LONGITUDE = -74.0059731;
075
076 /** Radius of the Earth in KM
077 *
078 * NOTE: this is approximate, because the earth is a bit
079 * wider at the equator than the poles. See
080 * http://en.wikipedia.org/wiki/Earth_radius */
081 public final static double EARTH_RADIUS_KM = 6371.01;
082
083 /** Empty constructor */
084 public DistanceFacetsExample() {}
085
086 /** Build the example index. */
087 public void index() throws IOException {
088 IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER,
089 new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));
090
091 // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter
092
093 // Add documents with latitude/longitude location:
094 Document doc = new Document();
095 doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO));
096 doc.add(new DoubleField("longitude", -73.9844722, Field.Store.NO));
097 writer.addDocument(doc);
098
099 doc = new Document();
100 doc.add(new DoubleField("latitude", 40.718266, Field.Store.NO));
101 doc.add(new DoubleField("longitude", -74.007819, Field.Store.NO));
102 writer.addDocument(doc);
103
104 doc = new Document();
105 doc.add(new DoubleField("latitude", 40.7051157, Field.Store.NO));
106 doc.add(new DoubleField("longitude", -74.0088305, Field.Store.NO));
107 writer.addDocument(doc);
108
109 // Open near-real-time searcher
110 searcher = new IndexSearcher(DirectoryReader.open(writer, true));
111 writer.close();
112 }
113
114 private ValueSource getDistanceValueSource() {
115 Expression distance;
116 try {
117 distance = JavascriptCompiler.compile(
118 "haversin(" + ORIGIN_LATITUDE + "," + ORIGIN_LONGITUDE + ",latitude,longitude)");
119 } catch (ParseException pe) {
120 // Should not happen
121 throw new RuntimeException(pe);
122 }
123 SimpleBindings bindings = new SimpleBindings();
124 bindings.add(new SortField("latitude", SortField.Type.DOUBLE));
125 bindings.add(new SortField("longitude", SortField.Type.DOUBLE));
126
127 return distance.getValueSource(bindings);
128 }
129
130 /** Given a latitude and longitude (in degrees) and the
131 * maximum great circle (surface of the earth) distance,
132 * returns a simple Filter bounding box to "fast match"
133 * candidates. */
134 public static Filter getBoundingBoxFilter(double originLat, double originLng, double maxDistanceKM) {
135
136 // Basic bounding box geo math from
137 // http://JanMatuschek.de/LatitudeLongitudeBoundingCoordinates,
138 // licensed under creative commons 3.0:
139 // http://creativecommons.org/licenses/by/3.0
140
141 // TODO: maybe switch to recursive prefix tree instead
142 // (in lucene/spatial)? It should be more efficient
143 // since it's a 2D trie...
144
145 // Degrees -> Radians:
146 double originLatRadians = Math.toRadians(originLat);
147 double originLngRadians = Math.toRadians(originLng);
148
149 double angle = maxDistanceKM / EARTH_RADIUS_KM;
150
151 double minLat = originLatRadians - angle;
152 double maxLat = originLatRadians + angle;
153
154 double minLng;
155 double maxLng;
156 if (minLat > Math.toRadians(-90) && maxLat < Math.toRadians(90)) {
157 double delta = Math.asin(Math.sin(angle)/Math.cos(originLatRadians));
158 minLng = originLngRadians - delta;
159 if (minLng < Math.toRadians(-180)) {
160 minLng += 2 * Math.PI;
161 }
162 maxLng = originLngRadians + delta;
163 if (maxLng > Math.toRadians(180)) {
164 maxLng -= 2 * Math.PI;
165 }
166 } else {
167 // The query includes a pole!
168 minLat = Math.max(minLat, Math.toRadians(-90));
169 maxLat = Math.min(maxLat, Math.toRadians(90));
170 minLng = Math.toRadians(-180);
171 maxLng = Math.toRadians(180);
172 }
173
174 BooleanFilter f = new BooleanFilter();
175
176 // Add latitude range filter:
177 f.add(NumericRangeFilter.newDoubleRange("latitude", Math.toDegrees(minLat), Math.toDegrees(maxLat), true, true),
178 BooleanClause.Occur.MUST);
179
180 // Add longitude range filter:
181 if (minLng > maxLng) {
182 // The bounding box crosses the international date
183 // line:
184 BooleanFilter lonF = new BooleanFilter();
185 lonF.add(NumericRangeFilter.newDoubleRange("longitude", Math.toDegrees(minLng), null, true, true),
186 BooleanClause.Occur.SHOULD);
187 lonF.add(NumericRangeFilter.newDoubleRange("longitude", null, Math.toDegrees(maxLng), true, true),
188 BooleanClause.Occur.SHOULD);
189 f.add(lonF, BooleanClause.Occur.MUST);
190 } else {
191 f.add(NumericRangeFilter.newDoubleRange("longitude", Math.toDegrees(minLng), Math.toDegrees(maxLng), true, true),
192 BooleanClause.Occur.MUST);
193 }
194
195 return f;
196 }
197
198 /** User runs a query and counts facets. */
199 public FacetResult search() throws IOException {
200
201 FacetsCollector fc = new FacetsCollector();
202
203 searcher.search(new MatchAllDocsQuery(), fc);
204
205 Facets facets = new DoubleRangeFacetCounts("field", getDistanceValueSource(), fc,
206 getBoundingBoxFilter(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, 10.0),
207 ONE_KM,
208 TWO_KM,
209 FIVE_KM,
210 TEN_KM);
211
212 return facets.getTopChildren(10, "field");
213 }
214
215 /** User drills down on the specified range. */
216 public TopDocs drillDown(DoubleRange range) throws IOException {
217
218 // Passing no baseQuery means we drill down on all
219 // documents ("browse only"):
220 DrillDownQuery q = new DrillDownQuery(null);
221 final ValueSource vs = getDistanceValueSource();
222 q.add("field", range.getFilter(getBoundingBoxFilter(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, range.max), vs));
223 DrillSideways ds = new DrillSideways(searcher, config, (TaxonomyReader) null) {
224 @Override
225 protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {
226 assert drillSideways.length == 1;
227 return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM);
228 }
229 };
230 return ds.search(q, 10).hits;
231 }
232
233 @Override
234 public void close() throws IOException {
235 searcher.getIndexReader().close();
236 indexDir.close();
237 }
238
239 /** Runs the search and drill-down examples and prints the results. */
240 @SuppressWarnings("unchecked")
241 public static void main(String[] args) throws Exception {
242 DistanceFacetsExample example = new DistanceFacetsExample();
243 example.index();
244
245 System.out.println("Distance facet counting example:");
246 System.out.println("-----------------------");
247 System.out.println(example.search());
248
249 System.out.println("\n");
250 System.out.println("Distance facet drill-down example (field/< 2 km):");
251 System.out.println("---------------------------------------------");
252 TopDocs hits = example.drillDown(example.TWO_KM);
253 System.out.println(hits.totalHits + " totalHits");
254
255 example.close();
256 }
257 }