package com.onlydoit.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import com.youedata.bean.FileBean;
/**Lucene索引工具类
* @author youedata
*
*/
public class LuceneIndexUtils {
/**创建或追加内容索引
* @param txtPath txt文件
* @param ipath 生成索引位置
* @param startRecPos 开始位置
* @param indexedRecNum 索引记录数
* @throws Exception
*/
public static void createLuIndexByTxt(String txtPath, String ipath, int startRecPos, int indexedRecNum) throws Exception {
FileBean bean = readSingleFile(txtPath, "utf-8");
Directory directory = null;
IndexWriter iw = null;
File indexFile = new File(ipath);
directory = FSDirectory.open(indexFile);
Analyzer analyzer = new IKAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_46, analyzer);
iwc.setMaxBufferedDocs(indexedRecNum);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
iw = new IndexWriter(directory, iwc);
String name = bean.getFileName();
name = name.replace(".txt", "");
String id = name;
System.out.println(id);
Document doc = new Document();
String content = bean.getFileContent();
content = content.replaceAll("《", "");
content = content.replaceAll("》", "");
content = content.replaceAll("\\(", "");
content = content.replaceAll("\\)", "");
content = content.replaceAll("\\)", "");
content = content.replaceAll("\\(", "");
content = content.replaceAll("\\[", "");
content = content.replaceAll("\\]", "");
content = content.replaceAll("【", "");
content = content.replaceAll("】", "");
content = content.replaceAll("〔", "");
content = content.replaceAll("〕", "");
content = content.replaceAll("”", "");
content = content.replaceAll("“", "");
content = content.replaceAll("\"", "");
doc.add(new StringField("id", id, Field.Store.YES));
doc.add(new StringField("fileContent", content, Field.Store.YES));
iw.addDocument(doc);
System.out.println("成功");
iw.close();
}
/**获取txt文件信息到实体类
* @param path
* @param encoding
* @return
* @throws Exception
*/
public static FileBean readSingleFile(String path, String encoding) throws Exception {
File file = new File(path);
FileBean bean = new FileBean();
if (file.isFile()) {
InputStreamReader read = new InputStreamReader(new FileInputStream(file), encoding);
BufferedReader br = new BufferedReader(read);
StringBuffer sb = new StringBuffer();
String line = "";
while ((line = br.readLine()) != null) {
sb.append(line);
}
bean.setFileContent(sb.toString());
bean.setFileName(file.getName());
bean.setFilePath(file.getPath());
read.close();
}
return bean;
}
/**
* 删除政策索引
*
* @param str 删除的关键字
* @param INDEX_DIR 索引路径
* @throws Exception
*/
public static void deletePolicyIndex(String policyId,String INDEX_DIR) throws Exception {
Date date1 = new Date();
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
Directory directory = FSDirectory.open(new File(INDEX_DIR));
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
IndexWriter indexWriter = new IndexWriter(directory, config);
indexWriter.deleteDocuments(new Term("id",policyId));
indexWriter.close();
Date date2 = new Date();
System.out.println("删除索引耗时:" + (date2.getTime() - date1.getTime()) + "ms\n");
}
/**更新政策索引
* @param temp
* @param ipath
* @param startRecPos
* @param indexedRecNum
* @throws IOException
*/
public static void updatePolicyIndex(FileBean temp,String ipath, int startRecPos,int indexedRecNum) throws IOException{
Directory directory = null;
IndexWriter iw = null;
File indexFile = new File(ipath);
directory = FSDirectory.open(indexFile);
Analyzer analyzer = new IKAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_46,analyzer);
iwc.setMaxBufferedDocs(indexedRecNum);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
iw = new IndexWriter(directory, iwc);
Document doc = new Document();
int a = getWeight(temp.getPolicyPic(), temp.getPolicyRead(),Integer.parseInt(temp.getPubDateFors()));
doc.add(new StringField("id",temp.getId(),Field.Store.YES));
doc.add(new StringField("unitId", temp.getUnitId(), Field.Store.YES));
doc.add(new StringField("unitName", temp.getUnitName(),Field.Store.YES));
doc.add(new StringField("unitAllName", temp.getUnitAllName(),Field.Store.YES));
doc.add(new StringField("unitType", temp.getUnitType(),Field.Store.YES));
doc.add(new StringField("unitTypeName", temp.getUnitTypeName(),Field.Store.YES));
doc.add(new StringField("themeId",temp.getThemeId(), Field.Store.YES));
doc.add(new StringField("themeName", temp.getThemeName(),Field.Store.YES));
doc.add(new IntField("pubDateFors", Integer.parseInt(temp.getPubDateFors()), Field.Store.YES));
doc.add(new StringField("pubDateFord", temp.getPubDateFord(),Field.Store.YES));
doc.add(new StringField("fileName", temp.getFileName(),Field.Store.YES));
doc.add(new StringField("fileNo", temp.getFileNo(), Field.Store.YES));
doc.add(new StringField("policyPic", temp.getPolicyPic(),Field.Store.YES));
doc.add(new StringField("policyRead", temp.getPolicyRead(),Field.Store.YES));
doc.add(new StringField("belongUnit", temp.getBelongUnit(),Field.Store.YES));
doc.add(new IntField("weight", a, Field.Store.YES));
// -------------------新增字段的域、值-----------------------------
doc.add(new StringField("interviewOnline", temp.getInterviewOnline(), Field.Store.YES));
doc.add(new StringField("ecologicalKeyWordsId", temp.getEcologicalKeyWordsId(), Field.Store.YES));
doc.add(new StringField("ecologicalKeyWords", temp.getEcologicalKeyWords(), Field.Store.YES));
doc.add(new StringField("timeStageId", temp.getTimeStageId(),Field.Store.YES));
doc.add(new StringField("timeStage", temp.getTimeStage(),Field.Store.YES));
// -------------------新增字段的域、值-----------------------------
iw.updateDocument(new Term("id", temp.getId()), doc);
System.out.println("更新索引成功");
iw.close();
}
/**创建或追加标题索引
* @param temp
* @param ipath
* @param startRecPos
* @param indexedRecNum
* @throws Exception
*/
public static void createTitleIndexByBean(FileBean temp,String ipath, int startRecPos,int indexedRecNum) throws Exception {
Directory directory = null;
IndexWriter iw = null;
File indexFile = new File(ipath);
directory = FSDirectory.open(indexFile);
Analyzer analyzer = new IKAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_46,analyzer);
iwc.setMaxBufferedDocs(indexedRecNum);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
iw = new IndexWriter(directory, iwc);
Document doc = new Document();
int a = getWeight(temp.getPolicyPic(), temp.getPolicyRead(),Integer.parseInt(temp.getPubDateFors()));
doc.add(new StringField("id",temp.getId(),Field.Store.YES));
doc.add(new StringField("unitId", temp.getUnitId(), Field.Store.YES));
doc.add(new StringField("unitName", temp.getUnitName(),Field.Store.YES));
doc.add(new StringField("unitAllName", temp.getUnitAllName(),Field.Store.YES));
doc.add(new StringField("unitType", temp.getUnitType(),Field.Store.YES));
doc.add(new StringField("unitTypeName", temp.getUnitTypeName(),Field.Store.YES));
doc.add(new StringField("themeId",temp.getThemeId(), Field.Store.YES));
doc.add(new StringField("themeName", temp.getThemeName(),Field.Store.YES));
doc.add(new IntField("pubDateFors", Integer.parseInt(temp.getPubDateFors()), Field.Store.YES));
doc.add(new StringField("pubDateFord", temp.getPubDateFord(),Field.Store.YES));
doc.add(new StringField("fileName", temp.getFileName(),Field.Store.YES));
doc.add(new StringField("fileNo", temp.getFileNo(), Field.Store.YES));
doc.add(new StringField("policyPic", temp.getPolicyPic(),Field.Store.YES));
doc.add(new StringField("policyRead", temp.getPolicyRead(),Field.Store.YES));
doc.add(new StringField("belongUnit", temp.getBelongUnit(),Field.Store.YES));
doc.add(new IntField("weight", a, Field.Store.YES));
// -------------------新增字段的域、值-----------------------------
doc.add(new StringField("interviewOnline", temp.getInterviewOnline(), Field.Store.YES));
doc.add(new StringField("ecologicalKeyWordsId", temp.getEcologicalKeyWordsId(), Field.Store.YES));
doc.add(new StringField("ecologicalKeyWords", temp.getEcologicalKeyWords(), Field.Store.YES));
doc.add(new StringField("timeStageId", temp.getTimeStageId(),Field.Store.YES));
doc.add(new StringField("timeStage", temp.getTimeStage(),Field.Store.YES));
// -------------------新增字段的域、值-----------------------------
iw.addDocument(doc);
System.out.println("创建标题索引成功");
iw.close();
}
public static int getWeight(String policyPic, String policyRead,int pubDateFors) {
int a = 0;
if (policyPic.equals("否") && policyRead.equals("否")) {
a = pubDateFors;
} else if (!policyPic.equals("否") && !policyRead.equals("否")) {
a = 4 * pubDateFors;
} else {
a = 2 * pubDateFors;
}
return a;
}
}