001    package org.apache.lucene.demo.facet;
002    
003    import java.io.IOException;
004    import java.util.HashMap;
005    import java.util.List;
006    import java.util.Map;
007    
008    import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
009    import org.apache.lucene.document.Document;
010    import org.apache.lucene.facet.associations.AssociationFloatSumFacetRequest;
011    import org.apache.lucene.facet.associations.AssociationIntSumFacetRequest;
012    import org.apache.lucene.facet.associations.AssociationsFacetFields;
013    import org.apache.lucene.facet.associations.CategoryAssociation;
014    import org.apache.lucene.facet.associations.CategoryAssociationsContainer;
015    import org.apache.lucene.facet.associations.CategoryFloatAssociation;
016    import org.apache.lucene.facet.associations.CategoryIntAssociation;
017    import org.apache.lucene.facet.associations.MultiAssociationsFacetsAggregator;
018    import org.apache.lucene.facet.associations.SumFloatAssociationFacetsAggregator;
019    import org.apache.lucene.facet.associations.SumIntAssociationFacetsAggregator;
020    import org.apache.lucene.facet.index.FacetFields;
021    import org.apache.lucene.facet.params.FacetSearchParams;
022    import org.apache.lucene.facet.search.FacetResult;
023    import org.apache.lucene.facet.search.FacetsAccumulator;
024    import org.apache.lucene.facet.search.FacetsAggregator;
025    import org.apache.lucene.facet.search.FacetsCollector;
026    import org.apache.lucene.facet.taxonomy.CategoryPath;
027    import org.apache.lucene.facet.taxonomy.TaxonomyReader;
028    import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
029    import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
030    import org.apache.lucene.index.DirectoryReader;
031    import org.apache.lucene.index.IndexWriter;
032    import org.apache.lucene.index.IndexWriterConfig;
033    import org.apache.lucene.search.IndexSearcher;
034    import org.apache.lucene.search.MatchAllDocsQuery;
035    import org.apache.lucene.store.Directory;
036    import org.apache.lucene.store.RAMDirectory;
037    
038    /*
039     * Licensed to the Apache Software Foundation (ASF) under one or more
040     * contributor license agreements.  See the NOTICE file distributed with
041     * this work for additional information regarding copyright ownership.
042     * The ASF licenses this file to You under the Apache License, Version 2.0
043     * (the "License"); you may not use this file except in compliance with
044     * the License.  You may obtain a copy of the License at
045     *
046     *     http://www.apache.org/licenses/LICENSE-2.0
047     *
048     * Unless required by applicable law or agreed to in writing, software
049     * distributed under the License is distributed on an "AS IS" BASIS,
050     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
051     * See the License for the specific language governing permissions and
052     * limitations under the License.
053     */
054    
055    /** Shows example usage of category associations. */
056    public class AssociationsFacetsExample {
057    
058      /**
059       * Categories per document, {@link #ASSOCIATIONS} hold the association value
060       * for each category.
061       */
062      public static CategoryPath[][] CATEGORIES = {
063        // Doc #1
064        { new CategoryPath("tags", "lucene") , 
065          new CategoryPath("genre", "computing")
066        },
067            
068        // Doc #2
069        { new CategoryPath("tags", "lucene"),  
070          new CategoryPath("tags", "solr"),
071          new CategoryPath("genre", "computing"),
072          new CategoryPath("genre", "software")
073        }
074      };
075    
076      /** Association values for each category. */
077      public static CategoryAssociation[][] ASSOCIATIONS = {
078        // Doc #1 associations
079        {
080          /* 3 occurrences for tag 'lucene' */
081          new CategoryIntAssociation(3), 
082          /* 87% confidence level of genre 'computing' */
083          new CategoryFloatAssociation(0.87f)
084        },
085        
086        // Doc #2 associations
087        {
088          /* 1 occurrence for tag 'lucene' */
089          new CategoryIntAssociation(1),
090          /* 2 occurrences for tag 'solr' */
091          new CategoryIntAssociation(2),
092          /* 75% confidence level of genre 'computing' */
093          new CategoryFloatAssociation(0.75f),
094          /* 34% confidence level of genre 'software' */
095          new CategoryFloatAssociation(0.34f),
096        }
097      };
098    
099      private final Directory indexDir = new RAMDirectory();
100      private final Directory taxoDir = new RAMDirectory();
101    
102      /** Empty constructor */
103      public AssociationsFacetsExample() {}
104      
105      /** Build the example index. */
106      private void index() throws IOException {
107        IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
108            new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));
109    
110        // Writes facet ords to a separate directory from the main index
111        DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
112    
113        // Reused across documents, to add the necessary facet fields
114        FacetFields facetFields = new AssociationsFacetFields(taxoWriter);
115        
116        for (int i = 0; i < CATEGORIES.length; i++) {
117          Document doc = new Document();
118          CategoryAssociationsContainer associations = new CategoryAssociationsContainer();
119          for (int j = 0; j < CATEGORIES[i].length; j++) {
120            associations.setAssociation(CATEGORIES[i][j], ASSOCIATIONS[i][j]);
121          }
122          facetFields.addFields(doc, associations);
123          indexWriter.addDocument(doc);
124        }
125        
126        indexWriter.close();
127        taxoWriter.close();
128      }
129    
130      /** User runs a query and aggregates facets by summing their association values. */
131      private List<FacetResult> sumAssociations() throws IOException {
132        DirectoryReader indexReader = DirectoryReader.open(indexDir);
133        IndexSearcher searcher = new IndexSearcher(indexReader);
134        TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
135        
136        CategoryPath tags = new CategoryPath("tags");
137        CategoryPath genre = new CategoryPath("genre");
138        FacetSearchParams fsp = new FacetSearchParams(
139            new AssociationIntSumFacetRequest(tags, 10), 
140            new AssociationFloatSumFacetRequest(genre, 10));
141      
142        // every category has a different type of association, so use chain their
143        // respective aggregators.
144        final Map<CategoryPath,FacetsAggregator> aggregators = new HashMap<CategoryPath,FacetsAggregator>();
145        aggregators.put(tags, new SumIntAssociationFacetsAggregator());
146        aggregators.put(genre, new SumFloatAssociationFacetsAggregator());
147        FacetsAccumulator fa = new FacetsAccumulator(fsp, indexReader, taxoReader) {
148          @Override
149          public FacetsAggregator getAggregator() {
150            return new MultiAssociationsFacetsAggregator(aggregators);
151          }
152        };
153        FacetsCollector fc = FacetsCollector.create(fa);
154        
155        // MatchAllDocsQuery is for "browsing" (counts facets
156        // for all non-deleted docs in the index); normally
157        // you'd use a "normal" query, and use MultiCollector to
158        // wrap collecting the "normal" hits and also facets:
159        searcher.search(new MatchAllDocsQuery(), fc);
160        
161        // Retrieve results
162        List<FacetResult> facetResults = fc.getFacetResults();
163        
164        indexReader.close();
165        taxoReader.close();
166        
167        return facetResults;
168      }
169      
170      /** Runs summing association example. */
171      public List<FacetResult> runSumAssociations() throws IOException {
172        index();
173        return sumAssociations();
174      }
175      
176      /** Runs the sum int/float associations examples and prints the results. */
177      public static void main(String[] args) throws Exception {
178        System.out.println("Sum associations example:");
179        System.out.println("-------------------------");
180        List<FacetResult> results = new AssociationsFacetsExample().runSumAssociations();
181        for (FacetResult res : results) {
182          System.out.println(res);
183        }
184      }
185      
186    }