001 package org.apache.lucene.demo.facet; 002 003 import java.io.IOException; 004 import java.util.HashMap; 005 import java.util.List; 006 import java.util.Map; 007 008 import org.apache.lucene.analysis.core.WhitespaceAnalyzer; 009 import org.apache.lucene.document.Document; 010 import org.apache.lucene.facet.associations.AssociationFloatSumFacetRequest; 011 import org.apache.lucene.facet.associations.AssociationIntSumFacetRequest; 012 import org.apache.lucene.facet.associations.AssociationsFacetFields; 013 import org.apache.lucene.facet.associations.CategoryAssociation; 014 import org.apache.lucene.facet.associations.CategoryAssociationsContainer; 015 import org.apache.lucene.facet.associations.CategoryFloatAssociation; 016 import org.apache.lucene.facet.associations.CategoryIntAssociation; 017 import org.apache.lucene.facet.associations.MultiAssociationsFacetsAggregator; 018 import org.apache.lucene.facet.associations.SumFloatAssociationFacetsAggregator; 019 import org.apache.lucene.facet.associations.SumIntAssociationFacetsAggregator; 020 import org.apache.lucene.facet.index.FacetFields; 021 import org.apache.lucene.facet.params.FacetSearchParams; 022 import org.apache.lucene.facet.search.FacetResult; 023 import org.apache.lucene.facet.search.FacetsAccumulator; 024 import org.apache.lucene.facet.search.FacetsAggregator; 025 import org.apache.lucene.facet.search.FacetsCollector; 026 import org.apache.lucene.facet.taxonomy.CategoryPath; 027 import org.apache.lucene.facet.taxonomy.TaxonomyReader; 028 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; 029 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; 030 import org.apache.lucene.index.DirectoryReader; 031 import org.apache.lucene.index.IndexWriter; 032 import org.apache.lucene.index.IndexWriterConfig; 033 import org.apache.lucene.search.IndexSearcher; 034 import org.apache.lucene.search.MatchAllDocsQuery; 035 import org.apache.lucene.store.Directory; 036 import org.apache.lucene.store.RAMDirectory; 037 038 /* 039 * Licensed to the Apache Software Foundation (ASF) under one or more 040 * contributor license agreements. See the NOTICE file distributed with 041 * this work for additional information regarding copyright ownership. 042 * The ASF licenses this file to You under the Apache License, Version 2.0 043 * (the "License"); you may not use this file except in compliance with 044 * the License. You may obtain a copy of the License at 045 * 046 * http://www.apache.org/licenses/LICENSE-2.0 047 * 048 * Unless required by applicable law or agreed to in writing, software 049 * distributed under the License is distributed on an "AS IS" BASIS, 050 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 051 * See the License for the specific language governing permissions and 052 * limitations under the License. 053 */ 054 055 /** Shows example usage of category associations. */ 056 public class AssociationsFacetsExample { 057 058 /** 059 * Categories per document, {@link #ASSOCIATIONS} hold the association value 060 * for each category. 061 */ 062 public static CategoryPath[][] CATEGORIES = { 063 // Doc #1 064 { new CategoryPath("tags", "lucene") , 065 new CategoryPath("genre", "computing") 066 }, 067 068 // Doc #2 069 { new CategoryPath("tags", "lucene"), 070 new CategoryPath("tags", "solr"), 071 new CategoryPath("genre", "computing"), 072 new CategoryPath("genre", "software") 073 } 074 }; 075 076 /** Association values for each category. */ 077 public static CategoryAssociation[][] ASSOCIATIONS = { 078 // Doc #1 associations 079 { 080 /* 3 occurrences for tag 'lucene' */ 081 new CategoryIntAssociation(3), 082 /* 87% confidence level of genre 'computing' */ 083 new CategoryFloatAssociation(0.87f) 084 }, 085 086 // Doc #2 associations 087 { 088 /* 1 occurrence for tag 'lucene' */ 089 new CategoryIntAssociation(1), 090 /* 2 occurrences for tag 'solr' */ 091 new CategoryIntAssociation(2), 092 /* 75% confidence level of genre 'computing' */ 093 new CategoryFloatAssociation(0.75f), 094 /* 34% confidence level of genre 'software' */ 095 new CategoryFloatAssociation(0.34f), 096 } 097 }; 098 099 private final Directory indexDir = new RAMDirectory(); 100 private final Directory taxoDir = new RAMDirectory(); 101 102 /** Empty constructor */ 103 public AssociationsFacetsExample() {} 104 105 /** Build the example index. */ 106 private void index() throws IOException { 107 IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 108 new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER))); 109 110 // Writes facet ords to a separate directory from the main index 111 DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); 112 113 // Reused across documents, to add the necessary facet fields 114 FacetFields facetFields = new AssociationsFacetFields(taxoWriter); 115 116 for (int i = 0; i < CATEGORIES.length; i++) { 117 Document doc = new Document(); 118 CategoryAssociationsContainer associations = new CategoryAssociationsContainer(); 119 for (int j = 0; j < CATEGORIES[i].length; j++) { 120 associations.setAssociation(CATEGORIES[i][j], ASSOCIATIONS[i][j]); 121 } 122 facetFields.addFields(doc, associations); 123 indexWriter.addDocument(doc); 124 } 125 126 indexWriter.close(); 127 taxoWriter.close(); 128 } 129 130 /** User runs a query and aggregates facets by summing their association values. */ 131 private List<FacetResult> sumAssociations() throws IOException { 132 DirectoryReader indexReader = DirectoryReader.open(indexDir); 133 IndexSearcher searcher = new IndexSearcher(indexReader); 134 TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); 135 136 CategoryPath tags = new CategoryPath("tags"); 137 CategoryPath genre = new CategoryPath("genre"); 138 FacetSearchParams fsp = new FacetSearchParams( 139 new AssociationIntSumFacetRequest(tags, 10), 140 new AssociationFloatSumFacetRequest(genre, 10)); 141 142 // every category has a different type of association, so use chain their 143 // respective aggregators. 144 final Map<CategoryPath,FacetsAggregator> aggregators = new HashMap<CategoryPath,FacetsAggregator>(); 145 aggregators.put(tags, new SumIntAssociationFacetsAggregator()); 146 aggregators.put(genre, new SumFloatAssociationFacetsAggregator()); 147 FacetsAccumulator fa = new FacetsAccumulator(fsp, indexReader, taxoReader) { 148 @Override 149 public FacetsAggregator getAggregator() { 150 return new MultiAssociationsFacetsAggregator(aggregators); 151 } 152 }; 153 FacetsCollector fc = FacetsCollector.create(fa); 154 155 // MatchAllDocsQuery is for "browsing" (counts facets 156 // for all non-deleted docs in the index); normally 157 // you'd use a "normal" query, and use MultiCollector to 158 // wrap collecting the "normal" hits and also facets: 159 searcher.search(new MatchAllDocsQuery(), fc); 160 161 // Retrieve results 162 List<FacetResult> facetResults = fc.getFacetResults(); 163 164 indexReader.close(); 165 taxoReader.close(); 166 167 return facetResults; 168 } 169 170 /** Runs summing association example. */ 171 public List<FacetResult> runSumAssociations() throws IOException { 172 index(); 173 return sumAssociations(); 174 } 175 176 /** Runs the sum int/float associations examples and prints the results. */ 177 public static void main(String[] args) throws Exception { 178 System.out.println("Sum associations example:"); 179 System.out.println("-------------------------"); 180 List<FacetResult> results = new AssociationsFacetsExample().runSumAssociations(); 181 for (FacetResult res : results) { 182 System.out.println(res); 183 } 184 } 185 186 }