搜索引擎技术系列教材（六）- lucene

-->

下载区
文件名	文件大小
lucene.rar	14m
解压rar如果失败，请用5.21版本或者更高版本的winrar 点击下载 winrar5.21

工具版本兼容问题

步骤 1 : 索引删除和更新
步骤 2 : 先说没有删除前的情形
步骤 3 : 删除索引
步骤 4 : 更多删除
步骤 5 : 更新索引

步骤 1 :

索引删除和更新

edit 顶折

纠问

索引建立好了之后，还是需要维护的，比如新增，删除和维护。新增就是建立索引的过程，这里就不表了，本教材主要讲索引的删除和更新。
索引里的数据，其实就是一个一个的Document 对象，那么本文就是介绍如何删除和更新这些Documen对象。

步骤 2 :

先说没有删除前的情形

edit 顶折

纠问

直接使用14万条数据里的代码，不过使用不一样的查询语句。
如图所示，通过关键字 “鞭" 可以查询到一条id是51173的数据。

package com.how2java; import java.io.IOException; import java.io.StringReader; import java.util.List; import java.util.Scanner; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.wltea.analyzer.lucene.IKAnalyzer; public class TestLucene { public static void main(String[] args) throws Exception { // 1. 准备中文分词器 IKAnalyzer analyzer = new IKAnalyzer(); // 2. 索引 Directory index = createIndex(analyzer); // 3. 查询器 Scanner s = new Scanner(System.in); while(true){ System.out.print("请输入查询关键字："); String keyword = s.nextLine(); System.out.println("当前关键字是："+keyword); Query query = new QueryParser( "name", analyzer).parse(keyword); // 4. 搜索 IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher=new IndexSearcher(reader); int numberPerPage = 10; ScoreDoc[] hits = searcher.search(query, numberPerPage).scoreDocs; // 5. 显示查询结果 showSearchResults(searcher, hits,query,analyzer); // 6. 关闭查询 reader.close(); } } private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer) throws Exception { System.out.println("找到 " + hits.length + " 个命中."); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("", ""); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); System.out.println("找到 " + hits.length + " 个命中."); System.out.println("序号\t匹配度得分\t结果"); for (int i = 0; i < hits.length; ++i) { ScoreDoc scoreDoc= hits[i]; int docId = scoreDoc.doc; Document d = searcher.doc(docId); List<IndexableField> fields= d.getFields(); System.out.print((i + 1) ); System.out.print("\t" + scoreDoc.score); for (IndexableField f : fields) { if("name".equals(f.name())){ TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name()))); String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name())); System.out.print("\t"+fieldContent); } else{ System.out.print("\t"+d.get(f.name())); } } System.out.println(" "); } } private static Directory createIndex(IKAnalyzer analyzer) throws IOException { Directory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(index, config); String fileName = "140k_products.txt"; List<Product> products = ProductUtil.file2list(fileName); int total = products.size(); int count = 0; int per = 0; int oldPer =0; for (Product p : products) { addDoc(writer, p); count++; per = count*100/total; if(per!=oldPer){ oldPer = per; System.out.printf("索引中，总共要添加 %d 条记录，当前添加进度是： %d%% %n",total,per); } } writer.close(); return index; } private static void addDoc(IndexWriter w, Product p) throws IOException { Document doc = new Document(); doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES)); doc.add(new TextField("name", p.getName(), Field.Store.YES)); doc.add(new TextField("category", p.getCategory(), Field.Store.YES)); doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES)); doc.add(new TextField("place", p.getPlace(), Field.Store.YES)); doc.add(new TextField("code", p.getCode(), Field.Store.YES)); w.addDocument(doc); } }

代码行数较多，请点击查看

package com.how2java;

import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import java.util.Scanner;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class TestLucene {

	public static void main(String[] args) throws Exception {
		// 1. 准备中文分词器
		IKAnalyzer analyzer = new IKAnalyzer();
		// 2. 索引
		Directory index = createIndex(analyzer);

		// 3. 查询器
		
        Scanner s = new Scanner(System.in);
        
        while(true){
        	System.out.print("请输入查询关键字：");
            String keyword = s.nextLine();
            System.out.println("当前关键字是："+keyword);
    		Query query = new QueryParser( "name", analyzer).parse(keyword);

    		// 4. 搜索
    		IndexReader reader = DirectoryReader.open(index);
    		IndexSearcher searcher=new IndexSearcher(reader);
    		int numberPerPage = 10;
    		ScoreDoc[] hits = searcher.search(query, numberPerPage).scoreDocs;
    		
    		// 5. 显示查询结果
    		showSearchResults(searcher, hits,query,analyzer);
    		// 6. 关闭查询
    		reader.close();
        }
		
	}

	private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer) throws Exception {
		System.out.println("找到 " + hits.length + " 个命中.");

        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
        Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));

        System.out.println("找到 " + hits.length + " 个命中.");
        System.out.println("序号\t匹配度得分\t结果");
		for (int i = 0; i < hits.length; ++i) {
			ScoreDoc scoreDoc= hits[i];
			int docId = scoreDoc.doc;
			Document d = searcher.doc(docId);
			List<IndexableField> fields= d.getFields();
			System.out.print((i + 1) );
			System.out.print("\t" + scoreDoc.score);
			for (IndexableField f : fields) {

				if("name".equals(f.name())){
		            TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name())));
		            String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name()));
					System.out.print("\t"+fieldContent);
				}
				else{
					System.out.print("\t"+d.get(f.name()));
				}
			}
			System.out.println("<br>");
		}
	}

	private static Directory createIndex(IKAnalyzer analyzer) throws IOException {
		Directory index = new RAMDirectory();
		IndexWriterConfig config = new IndexWriterConfig(analyzer);
		IndexWriter writer = new IndexWriter(index, config);
		String fileName = "140k_products.txt";
		List<Product> products = ProductUtil.file2list(fileName);
		int total = products.size();
		int count = 0;
		int per = 0;
		int oldPer =0;
		for (Product p : products) {
			addDoc(writer, p);
			count++;
			per = count*100/total;
			if(per!=oldPer){
				oldPer = per;
				System.out.printf("索引中，总共要添加 %d 条记录，当前添加进度是： %d%% %n",total,per);
			}
			
		}
		writer.close();
		return index;
	}

	private static void addDoc(IndexWriter w, Product p) throws IOException {
		Document doc = new Document();
		doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES));
		doc.add(new TextField("name", p.getName(), Field.Store.YES));
		doc.add(new TextField("category", p.getCategory(), Field.Store.YES));
		doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES));
		doc.add(new TextField("place", p.getPlace(), Field.Store.YES));
		doc.add(new TextField("code", p.getCode(), Field.Store.YES));
		w.addDocument(doc);
	}
}

步骤 3 :

删除索引

edit 顶折

纠问

删除id=51173的Document之后，如图所示，再搜索鞭字，就查询不到结果了。
删除关键代码如下，通过 Term对象删除

//删除id=51173的数据
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter indexWriter = new IndexWriter(index, config);
indexWriter.deleteDocuments(new Term("id", "51173"));
indexWriter.commit();
indexWriter.close();

代码片段
TestLucene.java

代码行数较多，请点击查看

        //删除id=51173的数据
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter indexWriter = new IndexWriter(index, config);
        indexWriter.deleteDocuments(new Term("id", "51173"));
        indexWriter.commit();
        indexWriter.close();

package com.how2java; import java.io.IOException; import java.io.StringReader; import java.util.List; import java.util.Scanner; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.wltea.analyzer.lucene.IKAnalyzer; public class TestLucene { public static void main(String[] args) throws Exception { // 1. 准备中文分词器 IKAnalyzer analyzer = new IKAnalyzer(); // 2. 索引 Directory index = createIndex(analyzer); // 3. 查询器 Scanner s = new Scanner(System.in); //删除id=51173的数据 IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter indexWriter = new IndexWriter(index, config); indexWriter.deleteDocuments(new Term("id", "51173")); indexWriter.commit(); indexWriter.close(); while(true){ System.out.print("请输入查询关键字："); String keyword = s.nextLine(); System.out.println("当前关键字是："+keyword); Query query = new QueryParser( "name", analyzer).parse(keyword); // 4. 搜索 IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher=new IndexSearcher(reader); int numberPerPage = 10; ScoreDoc[] hits = searcher.search(query, numberPerPage).scoreDocs; // 5. 显示查询结果 showSearchResults(searcher, hits,query,analyzer); // 6. 关闭查询 reader.close(); } } private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer) throws Exception { System.out.println("找到 " + hits.length + " 个命中."); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("", ""); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); System.out.println("找到 " + hits.length + " 个命中."); System.out.println("序号\t匹配度得分\t结果"); for (int i = 0; i < hits.length; ++i) { ScoreDoc scoreDoc= hits[i]; int docId = scoreDoc.doc; Document d = searcher.doc(docId); List<IndexableField> fields= d.getFields(); System.out.print((i + 1) ); System.out.print("\t" + scoreDoc.score); for (IndexableField f : fields) { if("name".equals(f.name())){ TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name()))); String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name())); System.out.print("\t"+fieldContent); } else{ System.out.print("\t"+d.get(f.name())); } } System.out.println(" "); } } private static Directory createIndex(IKAnalyzer analyzer) throws IOException { Directory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(index, config); String fileName = "140k_products.txt"; List<Product> products = ProductUtil.file2list(fileName); int total = products.size(); int count = 0; int per = 0; int oldPer =0; for (Product p : products) { addDoc(writer, p); count++; per = count*100/total; if(per!=oldPer){ oldPer = per; System.out.printf("索引中，总共要添加 %d 条记录，当前添加进度是： %d%% %n",total,per); } } writer.close(); return index; } private static void addDoc(IndexWriter w, Product p) throws IOException { Document doc = new Document(); doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES)); doc.add(new TextField("name", p.getName(), Field.Store.YES)); doc.add(new TextField("category", p.getCategory(), Field.Store.YES)); doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES)); doc.add(new TextField("place", p.getPlace(), Field.Store.YES)); doc.add(new TextField("code", p.getCode(), Field.Store.YES)); w.addDocument(doc); } }

代码行数较多，请点击查看

package com.how2java;

import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import java.util.Scanner;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class TestLucene {

	public static void main(String[] args) throws Exception {
		// 1. 准备中文分词器
		IKAnalyzer analyzer = new IKAnalyzer();
		// 2. 索引
		Directory index = createIndex(analyzer);

		// 3. 查询器
		
        Scanner s = new Scanner(System.in);
        
        //删除id=51173的数据
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter indexWriter = new IndexWriter(index, config);
        indexWriter.deleteDocuments(new Term("id", "51173"));
        indexWriter.commit();
        indexWriter.close();
        
        while(true){
        	System.out.print("请输入查询关键字：");
            String keyword = s.nextLine();
            System.out.println("当前关键字是："+keyword);
    		Query query = new QueryParser( "name", analyzer).parse(keyword);

    		// 4. 搜索
    		IndexReader reader = DirectoryReader.open(index);
    		IndexSearcher searcher=new IndexSearcher(reader);
    		int numberPerPage = 10;
    		ScoreDoc[] hits = searcher.search(query, numberPerPage).scoreDocs;
    		
    		// 5. 显示查询结果
    		showSearchResults(searcher, hits,query,analyzer);
    		// 6. 关闭查询
    		reader.close();
        }
		
	}

	private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer) throws Exception {
		System.out.println("找到 " + hits.length + " 个命中.");

        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
        Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));

        System.out.println("找到 " + hits.length + " 个命中.");
        System.out.println("序号\t匹配度得分\t结果");
		for (int i = 0; i < hits.length; ++i) {
			ScoreDoc scoreDoc= hits[i];
			int docId = scoreDoc.doc;
			Document d = searcher.doc(docId);
			List<IndexableField> fields= d.getFields();
			System.out.print((i + 1) );
			System.out.print("\t" + scoreDoc.score);
			for (IndexableField f : fields) {

				if("name".equals(f.name())){
		            TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name())));
		            String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name()));
					System.out.print("\t"+fieldContent);
				}
				else{
					System.out.print("\t"+d.get(f.name()));
				}
			}
			System.out.println("<br>");
		}
	}

	private static Directory createIndex(IKAnalyzer analyzer) throws IOException {
		Directory index = new RAMDirectory();
		IndexWriterConfig config = new IndexWriterConfig(analyzer);
		IndexWriter writer = new IndexWriter(index, config);
		String fileName = "140k_products.txt";
		List<Product> products = ProductUtil.file2list(fileName);
		int total = products.size();
		int count = 0;
		int per = 0;
		int oldPer =0;
		for (Product p : products) {
			addDoc(writer, p);
			count++;
			per = count*100/total;
			if(per!=oldPer){
				oldPer = per;
				System.out.printf("索引中，总共要添加 %d 条记录，当前添加进度是： %d%% %n",total,per);
			}
			
		}
		writer.close();
		return index;
	}

	private static void addDoc(IndexWriter w, Product p) throws IOException {
		Document doc = new Document();
		doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES));
		doc.add(new TextField("name", p.getName(), Field.Store.YES));
		doc.add(new TextField("category", p.getCategory(), Field.Store.YES));
		doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES));
		doc.add(new TextField("place", p.getPlace(), Field.Store.YES));
		doc.add(new TextField("code", p.getCode(), Field.Store.YES));
		w.addDocument(doc);
	}
}

步骤 4 :

更新索引

edit 顶折

纠问

如图所示，更新索引后，再用鞭查询，得到的结果是查出了更新之后的数据。更新的关键代码：

// 更新索引
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter indexWriter = new IndexWriter(index, config);
Document doc = new Document();
doc.add(new TextField("id", "51173", Field.Store.YES));
doc.add(new TextField("name", "神鞭，鞭没了，神还在", Field.Store.YES));
doc.add(new TextField("category", "道具", Field.Store.YES));
doc.add(new TextField("price", "998", Field.Store.YES));
doc.add(new TextField("place", "南海群岛", Field.Store.YES));
doc.add(new TextField("code", "888888", Field.Store.YES));

indexWriter.updateDocument(new Term("id", "51173"), doc );
indexWriter.commit();
indexWriter.close();

package com.how2java; import java.io.IOException; import java.io.StringReader; import java.util.List; import java.util.Scanner; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.wltea.analyzer.lucene.IKAnalyzer; public class TestLucene { public static void main(String[] args) throws Exception { // 1. 准备中文分词器 IKAnalyzer analyzer = new IKAnalyzer(); // 2. 索引 Directory index = createIndex(analyzer); // 3. 查询器 // 更新索引 IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter indexWriter = new IndexWriter(index, config); Document doc = new Document(); doc.add(new TextField("id", "51173", Field.Store.YES)); doc.add(new TextField("name", "神鞭，鞭没了，神还在", Field.Store.YES)); doc.add(new TextField("category", "道具", Field.Store.YES)); doc.add(new TextField("price", "998", Field.Store.YES)); doc.add(new TextField("place", "南海群岛", Field.Store.YES)); doc.add(new TextField("code", "888888", Field.Store.YES)); indexWriter.updateDocument(new Term("id", "51173"), doc ); indexWriter.commit(); indexWriter.close(); Scanner s = new Scanner(System.in); while(true){ System.out.print("请输入查询关键字："); String keyword = s.nextLine(); System.out.println("当前关键字是："+keyword); Query query = new QueryParser( "name", analyzer).parse(keyword); // 4. 搜索 IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher=new IndexSearcher(reader); int numberPerPage = 10; ScoreDoc[] hits = searcher.search(query, numberPerPage).scoreDocs; // 5. 显示查询结果 showSearchResults(searcher, hits,query,analyzer); // 6. 关闭查询 reader.close(); } } private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer) throws Exception { System.out.println("找到 " + hits.length + " 个命中."); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("", ""); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); System.out.println("找到 " + hits.length + " 个命中."); System.out.println("序号\t匹配度得分\t结果"); for (int i = 0; i < hits.length; ++i) { ScoreDoc scoreDoc= hits[i]; int docId = scoreDoc.doc; Document d = searcher.doc(docId); List<IndexableField> fields= d.getFields(); System.out.print((i + 1) ); System.out.print("\t" + scoreDoc.score); for (IndexableField f : fields) { if("name".equals(f.name())){ TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name()))); String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name())); System.out.print("\t"+fieldContent); } else{ System.out.print("\t"+d.get(f.name())); } } System.out.println(" "); } } private static Directory createIndex(IKAnalyzer analyzer) throws IOException { Directory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(index, config); String fileName = "140k_products.txt"; List<Product> products = ProductUtil.file2list(fileName); int total = products.size(); int count = 0; int per = 0; int oldPer =0; for (Product p : products) { addDoc(writer, p); count++; per = count*100/total; if(per!=oldPer){ oldPer = per; System.out.printf("索引中，总共要添加 %d 条记录，当前添加进度是： %d%% %n",total,per); } } writer.close(); return index; } private static void addDoc(IndexWriter w, Product p) throws IOException { Document doc = new Document(); doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES)); doc.add(new TextField("name", p.getName(), Field.Store.YES)); doc.add(new TextField("category", p.getCategory(), Field.Store.YES)); doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES)); doc.add(new TextField("place", p.getPlace(), Field.Store.YES)); doc.add(new TextField("code", p.getCode(), Field.Store.YES)); w.addDocument(doc); } }

代码行数较多，请点击查看

package com.how2java;

import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import java.util.Scanner;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class TestLucene {

	public static void main(String[] args) throws Exception {
		// 1. 准备中文分词器
		IKAnalyzer analyzer = new IKAnalyzer();
		// 2. 索引
		Directory index = createIndex(analyzer);

		// 3. 查询器
		
		// 更新索引
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter indexWriter = new IndexWriter(index, config);
		Document doc = new Document();
		doc.add(new TextField("id", "51173", Field.Store.YES));
		doc.add(new TextField("name", "神鞭，鞭没了，神还在", Field.Store.YES));
		doc.add(new TextField("category", "道具", Field.Store.YES));
		doc.add(new TextField("price", "998", Field.Store.YES));
		doc.add(new TextField("place", "南海群岛", Field.Store.YES));
		doc.add(new TextField("code", "888888", Field.Store.YES));
		
		indexWriter.updateDocument(new Term("id", "51173"), doc );
		indexWriter.commit();
		indexWriter.close();
        
        Scanner s = new Scanner(System.in);
        while(true){
        	System.out.print("请输入查询关键字：");
            String keyword = s.nextLine();
            System.out.println("当前关键字是："+keyword);
    		Query query = new QueryParser( "name", analyzer).parse(keyword);

    		// 4. 搜索
    		IndexReader reader = DirectoryReader.open(index);
    		IndexSearcher searcher=new IndexSearcher(reader);
    		int numberPerPage = 10;
    		ScoreDoc[] hits = searcher.search(query, numberPerPage).scoreDocs;
    		
    		// 5. 显示查询结果
    		showSearchResults(searcher, hits,query,analyzer);
    		// 6. 关闭查询
    		reader.close();
        }
		
	}

	private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer) throws Exception {
		System.out.println("找到 " + hits.length + " 个命中.");

        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
        Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));

        System.out.println("找到 " + hits.length + " 个命中.");
        System.out.println("序号\t匹配度得分\t结果");
		for (int i = 0; i < hits.length; ++i) {
			ScoreDoc scoreDoc= hits[i];
			int docId = scoreDoc.doc;
			Document d = searcher.doc(docId);
			List<IndexableField> fields= d.getFields();
			System.out.print((i + 1) );
			System.out.print("\t" + scoreDoc.score);
			for (IndexableField f : fields) {

				if("name".equals(f.name())){
		            TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name())));
		            String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name()));
					System.out.print("\t"+fieldContent);
				}
				else{
					System.out.print("\t"+d.get(f.name()));
				}
			}
			System.out.println("<br>");
		}
	}

	private static Directory createIndex(IKAnalyzer analyzer) throws IOException {
		Directory index = new RAMDirectory();
		IndexWriterConfig config = new IndexWriterConfig(analyzer);
		IndexWriter writer = new IndexWriter(index, config);
		String fileName = "140k_products.txt";
		List<Product> products = ProductUtil.file2list(fileName);
		int total = products.size();
		int count = 0;
		int per = 0;
		int oldPer =0;
		for (Product p : products) {
			addDoc(writer, p);
			count++;
			per = count*100/total;
			if(per!=oldPer){
				oldPer = per;
				System.out.printf("索引中，总共要添加 %d 条记录，当前添加进度是： %d%% %n",total,per);
			}
			
		}
		writer.close();
		return index;
	}

	private static void addDoc(IndexWriter w, Product p) throws IOException {
		Document doc = new Document();
		doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES));
		doc.add(new TextField("name", p.getName(), Field.Store.YES));
		doc.add(new TextField("category", p.getCategory(), Field.Store.YES));
		doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES));
		doc.add(new TextField("place", p.getPlace(), Field.Store.YES));
		doc.add(new TextField("code", p.getCode(), Field.Store.YES));
		w.addDocument(doc);
	}
}

搜索引擎技术系列教材（五）- lucene - 分页查询

搜索引擎技术系列教材（七）- lucene - 进一步学习

HOW2J公众号，关注后实时获知最新的教程和优惠活动，谢谢。

问答区域

2021-05-12 更新问题

JAVA界的盖伦

关于工具和中间件-搜索引擎技术-索引删除和更新的提问

更新时，如果不知道id,怎么处理呢

2 个答案

萌森
答案时间：2021-11-25

new Term("id", "51173") 把知道的换上去，虽然这样更新不准确

AbCdEFf
答案时间：2021-09-11

盖伦！

回答已经提交成功，正在审核。请于我的回答处查看回答记录，谢谢

提问之前请登陆

提问已经提交成功，正在审核。请于我的提问处查看提问记录，谢谢

关于工具和中间件-搜索引擎技术-索引删除和更新的提问

尽量提供截图、代码和异常信息，有助于分析和解决问题。也可进本站QQ群交流: 578362961

提问尽量提供完整的代码，环境描述，越是有利于问题的重现，您的问题越能更快得到解答。
对教程中代码有疑问，请提供是哪个步骤，哪一行有疑问，这样便于快速定位问题，提高问题得到解答的速度
在已经存在的几千个提问里，有相当大的比例，是因为使用了和站长不同版本的开发环境导致的,比如 jdk, eclpise, idea, mysql,tomcat 等等软件的版本不一致。
请使用和站长一样的版本，可以节约自己大量的学习时间。站长把教学中用的软件版本整理了，都统一放在了这里，方便大家下载： https://how2j.cn/k/helloworld/helloworld-version/1718.html

上传截图