001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.lucene.demo.facet; 018 019import java.io.Closeable; 020import java.io.IOException; 021import java.text.ParseException; 022import org.apache.lucene.analysis.core.WhitespaceAnalyzer; 023import org.apache.lucene.document.Document; 024import org.apache.lucene.document.DoublePoint; 025import org.apache.lucene.document.NumericDocValuesField; 026import org.apache.lucene.expressions.Expression; 027import org.apache.lucene.expressions.SimpleBindings; 028import org.apache.lucene.expressions.js.JavascriptCompiler; 029import org.apache.lucene.facet.DrillDownQuery; 030import org.apache.lucene.facet.DrillSideways; 031import org.apache.lucene.facet.FacetResult; 032import org.apache.lucene.facet.Facets; 033import org.apache.lucene.facet.FacetsCollector; 034import org.apache.lucene.facet.FacetsCollectorManager; 035import org.apache.lucene.facet.FacetsConfig; 036import org.apache.lucene.facet.range.DoubleRange; 037import org.apache.lucene.facet.range.DoubleRangeFacetCounts; 038import org.apache.lucene.facet.taxonomy.TaxonomyReader; 039import org.apache.lucene.index.DirectoryReader; 040import org.apache.lucene.index.IndexWriter; 041import org.apache.lucene.index.IndexWriterConfig; 042import org.apache.lucene.index.IndexWriterConfig.OpenMode; 043import org.apache.lucene.search.BooleanClause; 044import org.apache.lucene.search.BooleanQuery; 045import org.apache.lucene.search.DoubleValuesSource; 046import org.apache.lucene.search.IndexSearcher; 047import org.apache.lucene.search.MatchAllDocsQuery; 048import org.apache.lucene.search.Query; 049import org.apache.lucene.search.TopDocs; 050import org.apache.lucene.store.ByteBuffersDirectory; 051import org.apache.lucene.store.Directory; 052import org.apache.lucene.util.IOUtils; 053 054/** 055 * Shows simple usage of dynamic range faceting, using the expressions module to calculate distance. 056 */ 057public class DistanceFacetsExample implements Closeable { 058 059 final DoubleRange ONE_KM = new DoubleRange("< 1 km", 0.0, true, 1.0, false); 060 final DoubleRange TWO_KM = new DoubleRange("< 2 km", 0.0, true, 2.0, false); 061 final DoubleRange FIVE_KM = new DoubleRange("< 5 km", 0.0, true, 5.0, false); 062 final DoubleRange TEN_KM = new DoubleRange("< 10 km", 0.0, true, 10.0, false); 063 064 private final Directory indexDir = new ByteBuffersDirectory(); 065 private IndexSearcher searcher; 066 private final FacetsConfig config = new FacetsConfig(); 067 068 /** The "home" latitude. */ 069 public static final double ORIGIN_LATITUDE = 40.7143528; 070 071 /** The "home" longitude. */ 072 public static final double ORIGIN_LONGITUDE = -74.0059731; 073 074 /** 075 * Mean radius of the Earth in KM 076 * 077 * <p>NOTE: this is approximate, because the earth is a bit wider at the equator than the poles. 078 * See http://en.wikipedia.org/wiki/Earth_radius 079 */ 080 // see http://earth-info.nga.mil/GandG/publications/tr8350.2/wgs84fin.pdf 081 public static final double EARTH_RADIUS_KM = 6_371.0087714; 082 083 /** Empty constructor */ 084 public DistanceFacetsExample() {} 085 086 /** Build the example index. */ 087 public void index() throws IOException { 088 IndexWriter writer = 089 new IndexWriter( 090 indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); 091 092 // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter 093 094 // Add documents with latitude/longitude location: 095 // we index these both as DoublePoints (for bounding box/ranges) and as NumericDocValuesFields 096 // (for scoring) 097 Document doc = new Document(); 098 doc.add(new DoublePoint("latitude", 40.759011)); 099 doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011))); 100 doc.add(new DoublePoint("longitude", -73.9844722)); 101 doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722))); 102 writer.addDocument(doc); 103 104 doc = new Document(); 105 doc.add(new DoublePoint("latitude", 40.718266)); 106 doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266))); 107 doc.add(new DoublePoint("longitude", -74.007819)); 108 doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819))); 109 writer.addDocument(doc); 110 111 doc = new Document(); 112 doc.add(new DoublePoint("latitude", 40.7051157)); 113 doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157))); 114 doc.add(new DoublePoint("longitude", -74.0088305)); 115 doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305))); 116 writer.addDocument(doc); 117 118 // Open near-real-time searcher 119 searcher = new IndexSearcher(DirectoryReader.open(writer)); 120 writer.close(); 121 } 122 123 // TODO: Would be nice to augment this example with documents containing multiple "locations", 124 // adding the ability to compute distance facets for the multi-valued case (see LUCENE-10245) 125 private DoubleValuesSource getDistanceValueSource() { 126 Expression distance; 127 try { 128 distance = 129 JavascriptCompiler.compile( 130 "haversin(" + ORIGIN_LATITUDE + "," + ORIGIN_LONGITUDE + ",latitude,longitude)"); 131 } catch (ParseException pe) { 132 // Should not happen 133 throw new RuntimeException(pe); 134 } 135 SimpleBindings bindings = new SimpleBindings(); 136 bindings.add("latitude", DoubleValuesSource.fromDoubleField("latitude")); 137 bindings.add("longitude", DoubleValuesSource.fromDoubleField("longitude")); 138 139 return distance.getDoubleValuesSource(bindings); 140 } 141 142 /** 143 * Given a latitude and longitude (in degrees) and the maximum great circle (surface of the earth) 144 * distance, returns a simple Filter bounding box to "fast match" candidates. 145 */ 146 public static Query getBoundingBoxQuery( 147 double originLat, double originLng, double maxDistanceKM) { 148 149 // Basic bounding box geo math from 150 // http://JanMatuschek.de/LatitudeLongitudeBoundingCoordinates, 151 // licensed under creative commons 3.0: 152 // http://creativecommons.org/licenses/by/3.0 153 154 // TODO: maybe switch to recursive prefix tree instead 155 // (in lucene/spatial)? It should be more efficient 156 // since it's a 2D trie... 157 158 // Degrees -> Radians: 159 double originLatRadians = Math.toRadians(originLat); 160 double originLngRadians = Math.toRadians(originLng); 161 162 double angle = maxDistanceKM / EARTH_RADIUS_KM; 163 164 double minLat = originLatRadians - angle; 165 double maxLat = originLatRadians + angle; 166 167 double minLng; 168 double maxLng; 169 if (minLat > Math.toRadians(-90) && maxLat < Math.toRadians(90)) { 170 double delta = Math.asin(Math.sin(angle) / Math.cos(originLatRadians)); 171 minLng = originLngRadians - delta; 172 if (minLng < Math.toRadians(-180)) { 173 minLng += 2 * Math.PI; 174 } 175 maxLng = originLngRadians + delta; 176 if (maxLng > Math.toRadians(180)) { 177 maxLng -= 2 * Math.PI; 178 } 179 } else { 180 // The query includes a pole! 181 minLat = Math.max(minLat, Math.toRadians(-90)); 182 maxLat = Math.min(maxLat, Math.toRadians(90)); 183 minLng = Math.toRadians(-180); 184 maxLng = Math.toRadians(180); 185 } 186 187 BooleanQuery.Builder f = new BooleanQuery.Builder(); 188 189 // Add latitude range filter: 190 f.add( 191 DoublePoint.newRangeQuery("latitude", Math.toDegrees(minLat), Math.toDegrees(maxLat)), 192 BooleanClause.Occur.FILTER); 193 194 // Add longitude range filter: 195 if (minLng > maxLng) { 196 // The bounding box crosses the international date 197 // line: 198 BooleanQuery.Builder lonF = new BooleanQuery.Builder(); 199 lonF.add( 200 DoublePoint.newRangeQuery("longitude", Math.toDegrees(minLng), Double.POSITIVE_INFINITY), 201 BooleanClause.Occur.SHOULD); 202 lonF.add( 203 DoublePoint.newRangeQuery("longitude", Double.NEGATIVE_INFINITY, Math.toDegrees(maxLng)), 204 BooleanClause.Occur.SHOULD); 205 f.add(lonF.build(), BooleanClause.Occur.MUST); 206 } else { 207 f.add( 208 DoublePoint.newRangeQuery("longitude", Math.toDegrees(minLng), Math.toDegrees(maxLng)), 209 BooleanClause.Occur.FILTER); 210 } 211 212 return f.build(); 213 } 214 215 /** User runs a query and counts facets. */ 216 public FacetResult search() throws IOException { 217 218 FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager()); 219 220 Facets facets = 221 new DoubleRangeFacetCounts( 222 "field", 223 getDistanceValueSource(), 224 fc, 225 getBoundingBoxQuery(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, 10.0), 226 ONE_KM, 227 TWO_KM, 228 FIVE_KM, 229 TEN_KM); 230 231 return facets.getTopChildren(10, "field"); 232 } 233 234 /** User drills down on the specified range. */ 235 public TopDocs drillDown(DoubleRange range) throws IOException { 236 237 // Passing no baseQuery means we drill down on all 238 // documents ("browse only"): 239 DrillDownQuery q = new DrillDownQuery(null); 240 final DoubleValuesSource vs = getDistanceValueSource(); 241 q.add( 242 "field", 243 range.getQuery(getBoundingBoxQuery(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, range.max), vs)); 244 DrillSideways ds = 245 new DrillSideways(searcher, config, (TaxonomyReader) null) { 246 @Override 247 protected Facets buildFacetsResult( 248 FacetsCollector drillDowns, 249 FacetsCollector[] drillSideways, 250 String[] drillSidewaysDims) 251 throws IOException { 252 assert drillSideways.length == 1; 253 return new DoubleRangeFacetCounts( 254 "field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM); 255 } 256 }; 257 return ds.search(q, 10).hits; 258 } 259 260 @Override 261 public void close() throws IOException { 262 IOUtils.close(searcher.getIndexReader(), indexDir); 263 } 264 265 /** Runs the search and drill-down examples and prints the results. */ 266 public static void main(String[] args) throws Exception { 267 DistanceFacetsExample example = new DistanceFacetsExample(); 268 example.index(); 269 270 System.out.println("Distance facet counting example:"); 271 System.out.println("-----------------------"); 272 System.out.println(example.search()); 273 274 System.out.println("Distance facet drill-down example (field/< 2 km):"); 275 System.out.println("---------------------------------------------"); 276 TopDocs hits = example.drillDown(example.TWO_KM); 277 System.out.println(hits.totalHits + " totalHits"); 278 279 example.close(); 280 } 281}