001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.lucene.demo.facet; 018 019import java.io.IOException; 020import java.util.ArrayList; 021import java.util.List; 022import org.apache.lucene.analysis.core.WhitespaceAnalyzer; 023import org.apache.lucene.document.Document; 024import org.apache.lucene.facet.DrillDownQuery; 025import org.apache.lucene.facet.FacetResult; 026import org.apache.lucene.facet.Facets; 027import org.apache.lucene.facet.FacetsCollector; 028import org.apache.lucene.facet.FacetsCollectorManager; 029import org.apache.lucene.facet.FacetsConfig; 030import org.apache.lucene.facet.LabelAndValue; 031import org.apache.lucene.facet.taxonomy.AssociationAggregationFunction; 032import org.apache.lucene.facet.taxonomy.FloatAssociationFacetField; 033import org.apache.lucene.facet.taxonomy.IntAssociationFacetField; 034import org.apache.lucene.facet.taxonomy.TaxonomyFacetFloatAssociations; 035import org.apache.lucene.facet.taxonomy.TaxonomyFacetIntAssociations; 036import org.apache.lucene.facet.taxonomy.TaxonomyReader; 037import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; 038import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; 039import org.apache.lucene.index.DirectoryReader; 040import org.apache.lucene.index.IndexWriter; 041import org.apache.lucene.index.IndexWriterConfig; 042import org.apache.lucene.index.IndexWriterConfig.OpenMode; 043import org.apache.lucene.search.IndexSearcher; 044import org.apache.lucene.search.MatchAllDocsQuery; 045import org.apache.lucene.store.ByteBuffersDirectory; 046import org.apache.lucene.store.Directory; 047import org.apache.lucene.util.IOUtils; 048 049/** Shows example usage of category associations. */ 050public class AssociationsFacetsExample { 051 052 private final Directory indexDir = new ByteBuffersDirectory(); 053 private final Directory taxoDir = new ByteBuffersDirectory(); 054 private final FacetsConfig config; 055 056 /** Empty constructor */ 057 public AssociationsFacetsExample() { 058 config = new FacetsConfig(); 059 config.setMultiValued("tags", true); 060 config.setIndexFieldName("tags", "$tags"); 061 config.setMultiValued("genre", true); 062 config.setIndexFieldName("genre", "$genre"); 063 } 064 065 /** Build the example index. */ 066 private void index() throws IOException { 067 IndexWriterConfig iwc = 068 new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE); 069 IndexWriter indexWriter = new IndexWriter(indexDir, iwc); 070 071 // Writes facet ords to a separate directory from the main index 072 DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); 073 074 Document doc = new Document(); 075 // 3 occurrences for tag 'lucene' 076 doc.add(new IntAssociationFacetField(3, "tags", "lucene")); 077 // 87% confidence level of genre 'computing' 078 doc.add(new FloatAssociationFacetField(0.87f, "genre", "computing")); 079 indexWriter.addDocument(config.build(taxoWriter, doc)); 080 081 doc = new Document(); 082 // 1 occurrence for tag 'lucene' 083 doc.add(new IntAssociationFacetField(1, "tags", "lucene")); 084 // 2 occurrence for tag 'solr' 085 doc.add(new IntAssociationFacetField(2, "tags", "solr")); 086 // 75% confidence level of genre 'computing' 087 doc.add(new FloatAssociationFacetField(0.75f, "genre", "computing")); 088 // 34% confidence level of genre 'software' 089 doc.add(new FloatAssociationFacetField(0.34f, "genre", "software")); 090 indexWriter.addDocument(config.build(taxoWriter, doc)); 091 092 IOUtils.close(indexWriter, taxoWriter); 093 } 094 095 /** User runs a query and aggregates facets by summing their association values. */ 096 private List<FacetResult> sumAssociations() throws IOException { 097 DirectoryReader indexReader = DirectoryReader.open(indexDir); 098 IndexSearcher searcher = new IndexSearcher(indexReader); 099 TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); 100 101 // MatchAllDocsQuery is for "browsing" (counts facets 102 // for all non-deleted docs in the index); normally 103 // you'd use a "normal" query: 104 FacetsCollectorManager.FacetsResult facetsResult = 105 FacetsCollectorManager.search( 106 searcher, new MatchAllDocsQuery(), 10, new FacetsCollectorManager()); 107 FacetsCollector fc = facetsResult.facetsCollector(); 108 109 Facets tags = 110 new TaxonomyFacetIntAssociations( 111 "$tags", taxoReader, config, fc, AssociationAggregationFunction.SUM); 112 Facets genre = 113 new TaxonomyFacetFloatAssociations( 114 "$genre", taxoReader, config, fc, AssociationAggregationFunction.SUM); 115 116 // Retrieve results 117 List<FacetResult> results = new ArrayList<>(); 118 results.add(tags.getTopChildren(10, "tags")); 119 results.add(genre.getTopChildren(10, "genre")); 120 121 IOUtils.close(indexReader, taxoReader); 122 123 return results; 124 } 125 126 /** User drills down on 'tags/solr'. */ 127 private FacetResult drillDown() throws IOException { 128 DirectoryReader indexReader = DirectoryReader.open(indexDir); 129 IndexSearcher searcher = new IndexSearcher(indexReader); 130 TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); 131 132 // Passing no baseQuery means we drill down on all 133 // documents ("browse only"): 134 DrillDownQuery q = new DrillDownQuery(config); 135 136 // Now user drills down on Publish Date/2010: 137 q.add("tags", "solr"); 138 FacetsCollectorManager fcm = new FacetsCollectorManager(); 139 FacetsCollector fc = FacetsCollectorManager.search(searcher, q, 10, fcm).facetsCollector(); 140 141 // Retrieve results 142 Facets facets = 143 new TaxonomyFacetFloatAssociations( 144 "$genre", taxoReader, config, fc, AssociationAggregationFunction.SUM); 145 FacetResult result = facets.getTopChildren(10, "genre"); 146 147 IOUtils.close(indexReader, taxoReader); 148 149 return result; 150 } 151 152 /** Runs summing association example. */ 153 public List<FacetResult> runSumAssociations() throws IOException { 154 index(); 155 return sumAssociations(); 156 } 157 158 /** Runs the drill-down example. */ 159 public FacetResult runDrillDown() throws IOException { 160 index(); 161 return drillDown(); 162 } 163 164 /** Runs the sum int/float associations examples and prints the results. */ 165 public static void main(String[] args) throws Exception { 166 System.out.println("Sum associations example:"); 167 System.out.println("-------------------------"); 168 List<FacetResult> results = new AssociationsFacetsExample().runSumAssociations(); 169 System.out.println("tags: " + results.get(0)); 170 System.out.println("genre: " + results.get(1)); 171 System.out.println("-------------------------"); 172 System.out.println("Counts per label are also available:"); 173 for (FacetResult facetResult : results) { 174 for (LabelAndValue lv : facetResult.labelValues) { 175 System.out.println("\t" + lv.label + ": " + lv.count); 176 } 177 } 178 } 179}