001 package org.apache.lucene.demo.facet;
002
003 import java.io.IOException;
004 import java.util.HashMap;
005 import java.util.List;
006 import java.util.Map;
007
008 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
009 import org.apache.lucene.document.Document;
010 import org.apache.lucene.facet.associations.AssociationFloatSumFacetRequest;
011 import org.apache.lucene.facet.associations.AssociationIntSumFacetRequest;
012 import org.apache.lucene.facet.associations.AssociationsFacetFields;
013 import org.apache.lucene.facet.associations.CategoryAssociation;
014 import org.apache.lucene.facet.associations.CategoryAssociationsContainer;
015 import org.apache.lucene.facet.associations.CategoryFloatAssociation;
016 import org.apache.lucene.facet.associations.CategoryIntAssociation;
017 import org.apache.lucene.facet.associations.MultiAssociationsFacetsAggregator;
018 import org.apache.lucene.facet.associations.SumFloatAssociationFacetsAggregator;
019 import org.apache.lucene.facet.associations.SumIntAssociationFacetsAggregator;
020 import org.apache.lucene.facet.index.FacetFields;
021 import org.apache.lucene.facet.params.FacetSearchParams;
022 import org.apache.lucene.facet.search.FacetResult;
023 import org.apache.lucene.facet.search.FacetsAccumulator;
024 import org.apache.lucene.facet.search.FacetsAggregator;
025 import org.apache.lucene.facet.search.FacetsCollector;
026 import org.apache.lucene.facet.taxonomy.CategoryPath;
027 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
028 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
029 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
030 import org.apache.lucene.index.DirectoryReader;
031 import org.apache.lucene.index.IndexWriter;
032 import org.apache.lucene.index.IndexWriterConfig;
033 import org.apache.lucene.search.IndexSearcher;
034 import org.apache.lucene.search.MatchAllDocsQuery;
035 import org.apache.lucene.store.Directory;
036 import org.apache.lucene.store.RAMDirectory;
037
038 /*
039 * Licensed to the Apache Software Foundation (ASF) under one or more
040 * contributor license agreements. See the NOTICE file distributed with
041 * this work for additional information regarding copyright ownership.
042 * The ASF licenses this file to You under the Apache License, Version 2.0
043 * (the "License"); you may not use this file except in compliance with
044 * the License. You may obtain a copy of the License at
045 *
046 * http://www.apache.org/licenses/LICENSE-2.0
047 *
048 * Unless required by applicable law or agreed to in writing, software
049 * distributed under the License is distributed on an "AS IS" BASIS,
050 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
051 * See the License for the specific language governing permissions and
052 * limitations under the License.
053 */
054
055 /** Shows example usage of category associations. */
056 public class AssociationsFacetsExample {
057
058 /**
059 * Categories per document, {@link #ASSOCIATIONS} hold the association value
060 * for each category.
061 */
062 public static CategoryPath[][] CATEGORIES = {
063 // Doc #1
064 { new CategoryPath("tags", "lucene") ,
065 new CategoryPath("genre", "computing")
066 },
067
068 // Doc #2
069 { new CategoryPath("tags", "lucene"),
070 new CategoryPath("tags", "solr"),
071 new CategoryPath("genre", "computing"),
072 new CategoryPath("genre", "software")
073 }
074 };
075
076 /** Association values for each category. */
077 public static CategoryAssociation[][] ASSOCIATIONS = {
078 // Doc #1 associations
079 {
080 /* 3 occurrences for tag 'lucene' */
081 new CategoryIntAssociation(3),
082 /* 87% confidence level of genre 'computing' */
083 new CategoryFloatAssociation(0.87f)
084 },
085
086 // Doc #2 associations
087 {
088 /* 1 occurrence for tag 'lucene' */
089 new CategoryIntAssociation(1),
090 /* 2 occurrences for tag 'solr' */
091 new CategoryIntAssociation(2),
092 /* 75% confidence level of genre 'computing' */
093 new CategoryFloatAssociation(0.75f),
094 /* 34% confidence level of genre 'software' */
095 new CategoryFloatAssociation(0.34f),
096 }
097 };
098
099 private final Directory indexDir = new RAMDirectory();
100 private final Directory taxoDir = new RAMDirectory();
101
102 /** Empty constructor */
103 public AssociationsFacetsExample() {}
104
105 /** Build the example index. */
106 private void index() throws IOException {
107 IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER,
108 new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));
109
110 // Writes facet ords to a separate directory from the main index
111 DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
112
113 // Reused across documents, to add the necessary facet fields
114 FacetFields facetFields = new AssociationsFacetFields(taxoWriter);
115
116 for (int i = 0; i < CATEGORIES.length; i++) {
117 Document doc = new Document();
118 CategoryAssociationsContainer associations = new CategoryAssociationsContainer();
119 for (int j = 0; j < CATEGORIES[i].length; j++) {
120 associations.setAssociation(CATEGORIES[i][j], ASSOCIATIONS[i][j]);
121 }
122 facetFields.addFields(doc, associations);
123 indexWriter.addDocument(doc);
124 }
125
126 indexWriter.close();
127 taxoWriter.close();
128 }
129
130 /** User runs a query and aggregates facets by summing their association values. */
131 private List<FacetResult> sumAssociations() throws IOException {
132 DirectoryReader indexReader = DirectoryReader.open(indexDir);
133 IndexSearcher searcher = new IndexSearcher(indexReader);
134 TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
135
136 CategoryPath tags = new CategoryPath("tags");
137 CategoryPath genre = new CategoryPath("genre");
138 FacetSearchParams fsp = new FacetSearchParams(
139 new AssociationIntSumFacetRequest(tags, 10),
140 new AssociationFloatSumFacetRequest(genre, 10));
141
142 // every category has a different type of association, so use chain their
143 // respective aggregators.
144 final Map<CategoryPath,FacetsAggregator> aggregators = new HashMap<CategoryPath,FacetsAggregator>();
145 aggregators.put(tags, new SumIntAssociationFacetsAggregator());
146 aggregators.put(genre, new SumFloatAssociationFacetsAggregator());
147 FacetsAccumulator fa = new FacetsAccumulator(fsp, indexReader, taxoReader) {
148 @Override
149 public FacetsAggregator getAggregator() {
150 return new MultiAssociationsFacetsAggregator(aggregators);
151 }
152 };
153 FacetsCollector fc = FacetsCollector.create(fa);
154
155 // MatchAllDocsQuery is for "browsing" (counts facets
156 // for all non-deleted docs in the index); normally
157 // you'd use a "normal" query, and use MultiCollector to
158 // wrap collecting the "normal" hits and also facets:
159 searcher.search(new MatchAllDocsQuery(), fc);
160
161 // Retrieve results
162 List<FacetResult> facetResults = fc.getFacetResults();
163
164 indexReader.close();
165 taxoReader.close();
166
167 return facetResults;
168 }
169
170 /** Runs summing association example. */
171 public List<FacetResult> runSumAssociations() throws IOException {
172 index();
173 return sumAssociations();
174 }
175
176 /** Runs the sum int/float associations examples and prints the results. */
177 public static void main(String[] args) throws Exception {
178 System.out.println("Sum associations example:");
179 System.out.println("-------------------------");
180 List<FacetResult> results = new AssociationsFacetsExample().runSumAssociations();
181 for (FacetResult res : results) {
182 System.out.println(res);
183 }
184 }
185
186 }