001package org.apache.lucene.demo.facet;
002
003import java.io.IOException;
004import java.util.HashMap;
005import java.util.List;
006import java.util.Map;
007
008import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
009import org.apache.lucene.document.Document;
010import org.apache.lucene.facet.associations.AssociationFloatSumFacetRequest;
011import org.apache.lucene.facet.associations.AssociationIntSumFacetRequest;
012import org.apache.lucene.facet.associations.AssociationsFacetFields;
013import org.apache.lucene.facet.associations.CategoryAssociation;
014import org.apache.lucene.facet.associations.CategoryAssociationsContainer;
015import org.apache.lucene.facet.associations.CategoryFloatAssociation;
016import org.apache.lucene.facet.associations.CategoryIntAssociation;
017import org.apache.lucene.facet.associations.MultiAssociationsFacetsAggregator;
018import org.apache.lucene.facet.associations.SumFloatAssociationFacetsAggregator;
019import org.apache.lucene.facet.associations.SumIntAssociationFacetsAggregator;
020import org.apache.lucene.facet.index.FacetFields;
021import org.apache.lucene.facet.params.FacetSearchParams;
022import org.apache.lucene.facet.search.FacetResult;
023import org.apache.lucene.facet.search.FacetsAccumulator;
024import org.apache.lucene.facet.search.FacetsAggregator;
025import org.apache.lucene.facet.search.FacetsCollector;
026import org.apache.lucene.facet.taxonomy.CategoryPath;
027import org.apache.lucene.facet.taxonomy.TaxonomyReader;
028import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
029import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
030import org.apache.lucene.index.DirectoryReader;
031import org.apache.lucene.index.IndexWriter;
032import org.apache.lucene.index.IndexWriterConfig;
033import org.apache.lucene.search.IndexSearcher;
034import org.apache.lucene.search.MatchAllDocsQuery;
035import org.apache.lucene.store.Directory;
036import org.apache.lucene.store.RAMDirectory;
037
038/*
039 * Licensed to the Apache Software Foundation (ASF) under one or more
040 * contributor license agreements.  See the NOTICE file distributed with
041 * this work for additional information regarding copyright ownership.
042 * The ASF licenses this file to You under the Apache License, Version 2.0
043 * (the "License"); you may not use this file except in compliance with
044 * the License.  You may obtain a copy of the License at
045 *
046 *     http://www.apache.org/licenses/LICENSE-2.0
047 *
048 * Unless required by applicable law or agreed to in writing, software
049 * distributed under the License is distributed on an "AS IS" BASIS,
050 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
051 * See the License for the specific language governing permissions and
052 * limitations under the License.
053 */
054
055/** Shows example usage of category associations. */
056public class AssociationsFacetsExample {
057
058  /**
059   * Categories per document, {@link #ASSOCIATIONS} hold the association value
060   * for each category.
061   */
062  public static CategoryPath[][] CATEGORIES = {
063    // Doc #1
064    { new CategoryPath("tags", "lucene") , 
065      new CategoryPath("genre", "computing")
066    },
067        
068    // Doc #2
069    { new CategoryPath("tags", "lucene"),  
070      new CategoryPath("tags", "solr"),
071      new CategoryPath("genre", "computing"),
072      new CategoryPath("genre", "software")
073    }
074  };
075
076  /** Association values for each category. */
077  public static CategoryAssociation[][] ASSOCIATIONS = {
078    // Doc #1 associations
079    {
080      /* 3 occurrences for tag 'lucene' */
081      new CategoryIntAssociation(3), 
082      /* 87% confidence level of genre 'computing' */
083      new CategoryFloatAssociation(0.87f)
084    },
085    
086    // Doc #2 associations
087    {
088      /* 1 occurrence for tag 'lucene' */
089      new CategoryIntAssociation(1),
090      /* 2 occurrences for tag 'solr' */
091      new CategoryIntAssociation(2),
092      /* 75% confidence level of genre 'computing' */
093      new CategoryFloatAssociation(0.75f),
094      /* 34% confidence level of genre 'software' */
095      new CategoryFloatAssociation(0.34f),
096    }
097  };
098
099  private final Directory indexDir = new RAMDirectory();
100  private final Directory taxoDir = new RAMDirectory();
101
102  /** Empty constructor */
103  public AssociationsFacetsExample() {}
104  
105  /** Build the example index. */
106  private void index() throws IOException {
107    IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
108        new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));
109
110    // Writes facet ords to a separate directory from the main index
111    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
112
113    // Reused across documents, to add the necessary facet fields
114    FacetFields facetFields = new AssociationsFacetFields(taxoWriter);
115    
116    for (int i = 0; i < CATEGORIES.length; i++) {
117      Document doc = new Document();
118      CategoryAssociationsContainer associations = new CategoryAssociationsContainer();
119      for (int j = 0; j < CATEGORIES[i].length; j++) {
120        associations.setAssociation(CATEGORIES[i][j], ASSOCIATIONS[i][j]);
121      }
122      facetFields.addFields(doc, associations);
123      indexWriter.addDocument(doc);
124    }
125    
126    indexWriter.close();
127    taxoWriter.close();
128  }
129
130  /** User runs a query and aggregates facets by summing their association values. */
131  private List<FacetResult> sumAssociations() throws IOException {
132    DirectoryReader indexReader = DirectoryReader.open(indexDir);
133    IndexSearcher searcher = new IndexSearcher(indexReader);
134    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
135    
136    CategoryPath tags = new CategoryPath("tags");
137    CategoryPath genre = new CategoryPath("genre");
138    FacetSearchParams fsp = new FacetSearchParams(
139        new AssociationIntSumFacetRequest(tags, 10), 
140        new AssociationFloatSumFacetRequest(genre, 10));
141  
142    // every category has a different type of association, so use chain their
143    // respective aggregators.
144    final Map<CategoryPath,FacetsAggregator> aggregators = new HashMap<CategoryPath,FacetsAggregator>();
145    aggregators.put(tags, new SumIntAssociationFacetsAggregator());
146    aggregators.put(genre, new SumFloatAssociationFacetsAggregator());
147    FacetsAccumulator fa = new FacetsAccumulator(fsp, indexReader, taxoReader) {
148      @Override
149      public FacetsAggregator getAggregator() {
150        return new MultiAssociationsFacetsAggregator(aggregators);
151      }
152    };
153    FacetsCollector fc = FacetsCollector.create(fa);
154    
155    // MatchAllDocsQuery is for "browsing" (counts facets
156    // for all non-deleted docs in the index); normally
157    // you'd use a "normal" query, and use MultiCollector to
158    // wrap collecting the "normal" hits and also facets:
159    searcher.search(new MatchAllDocsQuery(), fc);
160    
161    // Retrieve results
162    List<FacetResult> facetResults = fc.getFacetResults();
163    
164    indexReader.close();
165    taxoReader.close();
166    
167    return facetResults;
168  }
169  
170  /** Runs summing association example. */
171  public List<FacetResult> runSumAssociations() throws IOException {
172    index();
173    return sumAssociations();
174  }
175  
176  /** Runs the sum int/float associations examples and prints the results. */
177  public static void main(String[] args) throws Exception {
178    System.out.println("Sum associations example:");
179    System.out.println("-------------------------");
180    List<FacetResult> results = new AssociationsFacetsExample().runSumAssociations();
181    for (FacetResult res : results) {
182      System.out.println(res);
183    }
184  }
185  
186}