001package org.apache.lucene.demo.facet;
002
003import java.io.IOException;
004import java.util.List;
005
006import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
007import org.apache.lucene.document.Document;
008import org.apache.lucene.facet.associations.AssociationsFacetFields;
009import org.apache.lucene.facet.associations.CategoryAssociation;
010import org.apache.lucene.facet.associations.CategoryAssociationsContainer;
011import org.apache.lucene.facet.associations.CategoryFloatAssociation;
012import org.apache.lucene.facet.associations.CategoryIntAssociation;
013import org.apache.lucene.facet.associations.SumFloatAssociationFacetRequest;
014import org.apache.lucene.facet.associations.SumIntAssociationFacetRequest;
015import org.apache.lucene.facet.index.FacetFields;
016import org.apache.lucene.facet.params.FacetSearchParams;
017import org.apache.lucene.facet.search.FacetResult;
018import org.apache.lucene.facet.search.FacetsCollector;
019import org.apache.lucene.facet.taxonomy.CategoryPath;
020import org.apache.lucene.facet.taxonomy.TaxonomyReader;
021import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
022import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
023import org.apache.lucene.index.DirectoryReader;
024import org.apache.lucene.index.IndexWriter;
025import org.apache.lucene.index.IndexWriterConfig;
026import org.apache.lucene.search.IndexSearcher;
027import org.apache.lucene.search.MatchAllDocsQuery;
028import org.apache.lucene.store.Directory;
029import org.apache.lucene.store.RAMDirectory;
030
031/*
032 * Licensed to the Apache Software Foundation (ASF) under one or more
033 * contributor license agreements.  See the NOTICE file distributed with
034 * this work for additional information regarding copyright ownership.
035 * The ASF licenses this file to You under the Apache License, Version 2.0
036 * (the "License"); you may not use this file except in compliance with
037 * the License.  You may obtain a copy of the License at
038 *
039 *     http://www.apache.org/licenses/LICENSE-2.0
040 *
041 * Unless required by applicable law or agreed to in writing, software
042 * distributed under the License is distributed on an "AS IS" BASIS,
043 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
044 * See the License for the specific language governing permissions and
045 * limitations under the License.
046 */
047
048/** Shows example usage of category associations. */
049public class AssociationsFacetsExample {
050
051  /**
052   * Categories per document, {@link #ASSOCIATIONS} hold the association value
053   * for each category.
054   */
055  public static CategoryPath[][] CATEGORIES = {
056    // Doc #1
057    { new CategoryPath("tags", "lucene") , 
058      new CategoryPath("genre", "computing")
059    },
060        
061    // Doc #2
062    { new CategoryPath("tags", "lucene"),  
063      new CategoryPath("tags", "solr"),
064      new CategoryPath("genre", "computing"),
065      new CategoryPath("genre", "software")
066    }
067  };
068
069  /** Association values for each category. */
070  public static CategoryAssociation[][] ASSOCIATIONS = {
071    // Doc #1 associations
072    {
073      /* 3 occurrences for tag 'lucene' */
074      new CategoryIntAssociation(3), 
075      /* 87% confidence level of genre 'computing' */
076      new CategoryFloatAssociation(0.87f)
077    },
078    
079    // Doc #2 associations
080    {
081      /* 1 occurrence for tag 'lucene' */
082      new CategoryIntAssociation(1),
083      /* 2 occurrences for tag 'solr' */
084      new CategoryIntAssociation(2),
085      /* 75% confidence level of genre 'computing' */
086      new CategoryFloatAssociation(0.75f),
087      /* 34% confidence level of genre 'software' */
088      new CategoryFloatAssociation(0.34f),
089    }
090  };
091
092  private final Directory indexDir = new RAMDirectory();
093  private final Directory taxoDir = new RAMDirectory();
094
095  /** Empty constructor */
096  public AssociationsFacetsExample() {}
097  
098  /** Build the example index. */
099  private void index() throws IOException {
100    IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
101        new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));
102
103    // Writes facet ords to a separate directory from the main index
104    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
105
106    // Reused across documents, to add the necessary facet fields
107    FacetFields facetFields = new AssociationsFacetFields(taxoWriter);
108    
109    for (int i = 0; i < CATEGORIES.length; i++) {
110      Document doc = new Document();
111      CategoryAssociationsContainer associations = new CategoryAssociationsContainer();
112      for (int j = 0; j < CATEGORIES[i].length; j++) {
113        associations.setAssociation(CATEGORIES[i][j], ASSOCIATIONS[i][j]);
114      }
115      facetFields.addFields(doc, associations);
116      indexWriter.addDocument(doc);
117    }
118    
119    indexWriter.close();
120    taxoWriter.close();
121  }
122
123  /** User runs a query and aggregates facets by summing their association values. */
124  private List<FacetResult> sumAssociations() throws IOException {
125    DirectoryReader indexReader = DirectoryReader.open(indexDir);
126    IndexSearcher searcher = new IndexSearcher(indexReader);
127    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
128    
129    CategoryPath tags = new CategoryPath("tags");
130    CategoryPath genre = new CategoryPath("genre");
131    FacetSearchParams fsp = new FacetSearchParams(new SumIntAssociationFacetRequest(tags, 10), 
132        new SumFloatAssociationFacetRequest(genre, 10));
133    FacetsCollector fc = FacetsCollector.create(fsp, indexReader, taxoReader);
134    
135    // MatchAllDocsQuery is for "browsing" (counts facets
136    // for all non-deleted docs in the index); normally
137    // you'd use a "normal" query, and use MultiCollector to
138    // wrap collecting the "normal" hits and also facets:
139    searcher.search(new MatchAllDocsQuery(), fc);
140    
141    // Retrieve results
142    List<FacetResult> facetResults = fc.getFacetResults();
143    
144    indexReader.close();
145    taxoReader.close();
146    
147    return facetResults;
148  }
149  
150  /** Runs summing association example. */
151  public List<FacetResult> runSumAssociations() throws IOException {
152    index();
153    return sumAssociations();
154  }
155  
156  /** Runs the sum int/float associations examples and prints the results. */
157  public static void main(String[] args) throws Exception {
158    System.out.println("Sum associations example:");
159    System.out.println("-------------------------");
160    List<FacetResult> results = new AssociationsFacetsExample().runSumAssociations();
161    for (FacetResult res : results) {
162      System.out.println(res);
163    }
164  }
165  
166}