001    package org.apache.lucene.demo.facet;
002    
003    import java.io.IOException;
004    import java.util.List;
005    
006    import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
007    import org.apache.lucene.document.Document;
008    import org.apache.lucene.facet.associations.AssociationsFacetFields;
009    import org.apache.lucene.facet.associations.CategoryAssociation;
010    import org.apache.lucene.facet.associations.CategoryAssociationsContainer;
011    import org.apache.lucene.facet.associations.CategoryFloatAssociation;
012    import org.apache.lucene.facet.associations.CategoryIntAssociation;
013    import org.apache.lucene.facet.associations.SumFloatAssociationFacetRequest;
014    import org.apache.lucene.facet.associations.SumIntAssociationFacetRequest;
015    import org.apache.lucene.facet.index.FacetFields;
016    import org.apache.lucene.facet.params.FacetSearchParams;
017    import org.apache.lucene.facet.search.FacetResult;
018    import org.apache.lucene.facet.search.FacetsCollector;
019    import org.apache.lucene.facet.taxonomy.CategoryPath;
020    import org.apache.lucene.facet.taxonomy.TaxonomyReader;
021    import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
022    import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
023    import org.apache.lucene.index.DirectoryReader;
024    import org.apache.lucene.index.IndexWriter;
025    import org.apache.lucene.index.IndexWriterConfig;
026    import org.apache.lucene.search.IndexSearcher;
027    import org.apache.lucene.search.MatchAllDocsQuery;
028    import org.apache.lucene.store.Directory;
029    import org.apache.lucene.store.RAMDirectory;
030    
031    /*
032     * Licensed to the Apache Software Foundation (ASF) under one or more
033     * contributor license agreements.  See the NOTICE file distributed with
034     * this work for additional information regarding copyright ownership.
035     * The ASF licenses this file to You under the Apache License, Version 2.0
036     * (the "License"); you may not use this file except in compliance with
037     * the License.  You may obtain a copy of the License at
038     *
039     *     http://www.apache.org/licenses/LICENSE-2.0
040     *
041     * Unless required by applicable law or agreed to in writing, software
042     * distributed under the License is distributed on an "AS IS" BASIS,
043     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
044     * See the License for the specific language governing permissions and
045     * limitations under the License.
046     */
047    
048    /** Shows example usage of category associations. */
049    public class AssociationsFacetsExample {
050    
051      /**
052       * Categories per document, {@link #ASSOCIATIONS} hold the association value
053       * for each category.
054       */
055      public static CategoryPath[][] CATEGORIES = {
056        // Doc #1
057        { new CategoryPath("tags", "lucene") , 
058          new CategoryPath("genre", "computing")
059        },
060            
061        // Doc #2
062        { new CategoryPath("tags", "lucene"),  
063          new CategoryPath("tags", "solr"),
064          new CategoryPath("genre", "computing"),
065          new CategoryPath("genre", "software")
066        }
067      };
068    
069      /** Association values for each category. */
070      public static CategoryAssociation[][] ASSOCIATIONS = {
071        // Doc #1 associations
072        {
073          /* 3 occurrences for tag 'lucene' */
074          new CategoryIntAssociation(3), 
075          /* 87% confidence level of genre 'computing' */
076          new CategoryFloatAssociation(0.87f)
077        },
078        
079        // Doc #2 associations
080        {
081          /* 1 occurrence for tag 'lucene' */
082          new CategoryIntAssociation(1),
083          /* 2 occurrences for tag 'solr' */
084          new CategoryIntAssociation(2),
085          /* 75% confidence level of genre 'computing' */
086          new CategoryFloatAssociation(0.75f),
087          /* 34% confidence level of genre 'software' */
088          new CategoryFloatAssociation(0.34f),
089        }
090      };
091    
092      private final Directory indexDir = new RAMDirectory();
093      private final Directory taxoDir = new RAMDirectory();
094    
095      /** Empty constructor */
096      public AssociationsFacetsExample() {}
097      
098      /** Build the example index. */
099      private void index() throws IOException {
100        IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
101            new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));
102    
103        // Writes facet ords to a separate directory from the main index
104        DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
105    
106        // Reused across documents, to add the necessary facet fields
107        FacetFields facetFields = new AssociationsFacetFields(taxoWriter);
108        
109        for (int i = 0; i < CATEGORIES.length; i++) {
110          Document doc = new Document();
111          CategoryAssociationsContainer associations = new CategoryAssociationsContainer();
112          for (int j = 0; j < CATEGORIES[i].length; j++) {
113            associations.setAssociation(CATEGORIES[i][j], ASSOCIATIONS[i][j]);
114          }
115          facetFields.addFields(doc, associations);
116          indexWriter.addDocument(doc);
117        }
118        
119        indexWriter.close();
120        taxoWriter.close();
121      }
122    
123      /** User runs a query and aggregates facets by summing their association values. */
124      private List<FacetResult> sumAssociations() throws IOException {
125        DirectoryReader indexReader = DirectoryReader.open(indexDir);
126        IndexSearcher searcher = new IndexSearcher(indexReader);
127        TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
128        
129        CategoryPath tags = new CategoryPath("tags");
130        CategoryPath genre = new CategoryPath("genre");
131        FacetSearchParams fsp = new FacetSearchParams(new SumIntAssociationFacetRequest(tags, 10), 
132            new SumFloatAssociationFacetRequest(genre, 10));
133        FacetsCollector fc = FacetsCollector.create(fsp, indexReader, taxoReader);
134        
135        // MatchAllDocsQuery is for "browsing" (counts facets
136        // for all non-deleted docs in the index); normally
137        // you'd use a "normal" query, and use MultiCollector to
138        // wrap collecting the "normal" hits and also facets:
139        searcher.search(new MatchAllDocsQuery(), fc);
140        
141        // Retrieve results
142        List<FacetResult> facetResults = fc.getFacetResults();
143        
144        indexReader.close();
145        taxoReader.close();
146        
147        return facetResults;
148      }
149      
150      /** Runs summing association example. */
151      public List<FacetResult> runSumAssociations() throws IOException {
152        index();
153        return sumAssociations();
154      }
155      
156      /** Runs the sum int/float associations examples and prints the results. */
157      public static void main(String[] args) throws Exception {
158        System.out.println("Sum associations example:");
159        System.out.println("-------------------------");
160        List<FacetResult> results = new AssociationsFacetsExample().runSumAssociations();
161        for (FacetResult res : results) {
162          System.out.println(res);
163        }
164      }
165      
166    }