001package org.apache.lucene.demo.facet; 002 003/* 004 * Licensed to the Apache Software Foundation (ASF) under one or more 005 * contributor license agreements. See the NOTICE file distributed with 006 * this work for additional information regarding copyright ownership. 007 * The ASF licenses this file to You under the Apache License, Version 2.0 008 * (the "License"); you may not use this file except in compliance with 009 * the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020import java.io.Closeable; 021import java.io.IOException; 022import java.text.ParseException; 023 024import org.apache.lucene.analysis.core.WhitespaceAnalyzer; 025import org.apache.lucene.document.Document; 026import org.apache.lucene.document.DoubleField; 027import org.apache.lucene.document.Field; 028import org.apache.lucene.expressions.Expression; 029import org.apache.lucene.expressions.SimpleBindings; 030import org.apache.lucene.expressions.js.JavascriptCompiler; 031import org.apache.lucene.facet.DrillDownQuery; 032import org.apache.lucene.facet.DrillSideways; 033import org.apache.lucene.facet.FacetResult; 034import org.apache.lucene.facet.Facets; 035import org.apache.lucene.facet.FacetsCollector; 036import org.apache.lucene.facet.FacetsConfig; 037import org.apache.lucene.facet.range.DoubleRange; 038import org.apache.lucene.facet.range.DoubleRangeFacetCounts; 039import org.apache.lucene.facet.taxonomy.TaxonomyReader; 040import org.apache.lucene.index.DirectoryReader; 041import org.apache.lucene.index.IndexWriter; 042import org.apache.lucene.index.IndexWriterConfig; 043import org.apache.lucene.queries.BooleanFilter; 044import org.apache.lucene.queries.function.ValueSource; 045import org.apache.lucene.search.BooleanClause; 046import org.apache.lucene.search.Filter; 047import org.apache.lucene.search.IndexSearcher; 048import org.apache.lucene.search.MatchAllDocsQuery; 049import org.apache.lucene.search.NumericRangeFilter; 050import org.apache.lucene.search.SortField; 051import org.apache.lucene.search.TopDocs; 052import org.apache.lucene.store.Directory; 053import org.apache.lucene.store.RAMDirectory; 054 055 056 057/** Shows simple usage of dynamic range faceting, using the 058 * expressions module to calculate distance. */ 059public class DistanceFacetsExample implements Closeable { 060 061 final DoubleRange ONE_KM = new DoubleRange("< 1 km", 0.0, true, 1.0, false); 062 final DoubleRange TWO_KM = new DoubleRange("< 2 km", 0.0, true, 2.0, false); 063 final DoubleRange FIVE_KM = new DoubleRange("< 5 km", 0.0, true, 5.0, false); 064 final DoubleRange TEN_KM = new DoubleRange("< 10 km", 0.0, true, 10.0, false); 065 066 private final Directory indexDir = new RAMDirectory(); 067 private IndexSearcher searcher; 068 private final FacetsConfig config = new FacetsConfig(); 069 070 /** The "home" latitude. */ 071 public final static double ORIGIN_LATITUDE = 40.7143528; 072 073 /** The "home" longitude. */ 074 public final static double ORIGIN_LONGITUDE = -74.0059731; 075 076 /** Radius of the Earth in KM 077 * 078 * NOTE: this is approximate, because the earth is a bit 079 * wider at the equator than the poles. See 080 * http://en.wikipedia.org/wiki/Earth_radius */ 081 public final static double EARTH_RADIUS_KM = 6371.01; 082 083 /** Empty constructor */ 084 public DistanceFacetsExample() {} 085 086 /** Build the example index. */ 087 public void index() throws IOException { 088 IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 089 new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER))); 090 091 // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter 092 093 // Add documents with latitude/longitude location: 094 Document doc = new Document(); 095 doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO)); 096 doc.add(new DoubleField("longitude", -73.9844722, Field.Store.NO)); 097 writer.addDocument(doc); 098 099 doc = new Document(); 100 doc.add(new DoubleField("latitude", 40.718266, Field.Store.NO)); 101 doc.add(new DoubleField("longitude", -74.007819, Field.Store.NO)); 102 writer.addDocument(doc); 103 104 doc = new Document(); 105 doc.add(new DoubleField("latitude", 40.7051157, Field.Store.NO)); 106 doc.add(new DoubleField("longitude", -74.0088305, Field.Store.NO)); 107 writer.addDocument(doc); 108 109 // Open near-real-time searcher 110 searcher = new IndexSearcher(DirectoryReader.open(writer, true)); 111 writer.close(); 112 } 113 114 private ValueSource getDistanceValueSource() { 115 Expression distance; 116 try { 117 distance = JavascriptCompiler.compile( 118 "haversin(" + ORIGIN_LATITUDE + "," + ORIGIN_LONGITUDE + ",latitude,longitude)"); 119 } catch (ParseException pe) { 120 // Should not happen 121 throw new RuntimeException(pe); 122 } 123 SimpleBindings bindings = new SimpleBindings(); 124 bindings.add(new SortField("latitude", SortField.Type.DOUBLE)); 125 bindings.add(new SortField("longitude", SortField.Type.DOUBLE)); 126 127 return distance.getValueSource(bindings); 128 } 129 130 /** Given a latitude and longitude (in degrees) and the 131 * maximum great circle (surface of the earth) distance, 132 * returns a simple Filter bounding box to "fast match" 133 * candidates. */ 134 public static Filter getBoundingBoxFilter(double originLat, double originLng, double maxDistanceKM) { 135 136 // Basic bounding box geo math from 137 // http://JanMatuschek.de/LatitudeLongitudeBoundingCoordinates, 138 // licensed under creative commons 3.0: 139 // http://creativecommons.org/licenses/by/3.0 140 141 // TODO: maybe switch to recursive prefix tree instead 142 // (in lucene/spatial)? It should be more efficient 143 // since it's a 2D trie... 144 145 // Degrees -> Radians: 146 double originLatRadians = Math.toRadians(originLat); 147 double originLngRadians = Math.toRadians(originLng); 148 149 double angle = maxDistanceKM / EARTH_RADIUS_KM; 150 151 double minLat = originLatRadians - angle; 152 double maxLat = originLatRadians + angle; 153 154 double minLng; 155 double maxLng; 156 if (minLat > Math.toRadians(-90) && maxLat < Math.toRadians(90)) { 157 double delta = Math.asin(Math.sin(angle)/Math.cos(originLatRadians)); 158 minLng = originLngRadians - delta; 159 if (minLng < Math.toRadians(-180)) { 160 minLng += 2 * Math.PI; 161 } 162 maxLng = originLngRadians + delta; 163 if (maxLng > Math.toRadians(180)) { 164 maxLng -= 2 * Math.PI; 165 } 166 } else { 167 // The query includes a pole! 168 minLat = Math.max(minLat, Math.toRadians(-90)); 169 maxLat = Math.min(maxLat, Math.toRadians(90)); 170 minLng = Math.toRadians(-180); 171 maxLng = Math.toRadians(180); 172 } 173 174 BooleanFilter f = new BooleanFilter(); 175 176 // Add latitude range filter: 177 f.add(NumericRangeFilter.newDoubleRange("latitude", Math.toDegrees(minLat), Math.toDegrees(maxLat), true, true), 178 BooleanClause.Occur.MUST); 179 180 // Add longitude range filter: 181 if (minLng > maxLng) { 182 // The bounding box crosses the international date 183 // line: 184 BooleanFilter lonF = new BooleanFilter(); 185 lonF.add(NumericRangeFilter.newDoubleRange("longitude", Math.toDegrees(minLng), null, true, true), 186 BooleanClause.Occur.SHOULD); 187 lonF.add(NumericRangeFilter.newDoubleRange("longitude", null, Math.toDegrees(maxLng), true, true), 188 BooleanClause.Occur.SHOULD); 189 f.add(lonF, BooleanClause.Occur.MUST); 190 } else { 191 f.add(NumericRangeFilter.newDoubleRange("longitude", Math.toDegrees(minLng), Math.toDegrees(maxLng), true, true), 192 BooleanClause.Occur.MUST); 193 } 194 195 return f; 196 } 197 198 /** User runs a query and counts facets. */ 199 public FacetResult search() throws IOException { 200 201 FacetsCollector fc = new FacetsCollector(); 202 203 searcher.search(new MatchAllDocsQuery(), fc); 204 205 Facets facets = new DoubleRangeFacetCounts("field", getDistanceValueSource(), fc, 206 getBoundingBoxFilter(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, 10.0), 207 ONE_KM, 208 TWO_KM, 209 FIVE_KM, 210 TEN_KM); 211 212 return facets.getTopChildren(10, "field"); 213 } 214 215 /** User drills down on the specified range. */ 216 public TopDocs drillDown(DoubleRange range) throws IOException { 217 218 // Passing no baseQuery means we drill down on all 219 // documents ("browse only"): 220 DrillDownQuery q = new DrillDownQuery(null); 221 final ValueSource vs = getDistanceValueSource(); 222 q.add("field", range.getFilter(getBoundingBoxFilter(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, range.max), vs)); 223 DrillSideways ds = new DrillSideways(searcher, config, (TaxonomyReader) null) { 224 @Override 225 protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException { 226 assert drillSideways.length == 1; 227 return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM); 228 } 229 }; 230 return ds.search(q, 10).hits; 231 } 232 233 @Override 234 public void close() throws IOException { 235 searcher.getIndexReader().close(); 236 indexDir.close(); 237 } 238 239 /** Runs the search and drill-down examples and prints the results. */ 240 @SuppressWarnings("unchecked") 241 public static void main(String[] args) throws Exception { 242 DistanceFacetsExample example = new DistanceFacetsExample(); 243 example.index(); 244 245 System.out.println("Distance facet counting example:"); 246 System.out.println("-----------------------"); 247 System.out.println(example.search()); 248 249 System.out.println("\n"); 250 System.out.println("Distance facet drill-down example (field/< 2 km):"); 251 System.out.println("---------------------------------------------"); 252 TopDocs hits = example.drillDown(example.TWO_KM); 253 System.out.println(hits.totalHits + " totalHits"); 254 255 example.close(); 256 } 257}