源码下载:链接:https://pan.baidu.com/s/1D3yszkTzjwQz0vFRozQl2g?pwd=z6kb
提取码:z6kb
实现思路
1.搭建一个新的springboot项目,不会的请看我这篇博客:springboot项目搭建
2.添加maven依赖
<dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-web</artifactId></dependency><dependency><groupId>org.apache.tika</groupId><artifactId>tika-core</artifactId><version>1.27</version></dependency><dependency><groupId>org.elasticsearch</groupId><artifactId>elasticsearch</artifactId><version>7.10.0</version></dependency><dependency><groupId>org.elasticsearch.client</groupId><artifactId>elasticsearch-rest-high-level-client</artifactId><version>7.10.0</version></dependency><dependency><groupId>org.apache.commons</groupId><artifactId>commons-lang3</artifactId><version>3.6</version></dependency><dependency><groupId>com.alibaba</groupId><artifactId>fastjson</artifactId><version>1.2.83</version></dependency><dependency><groupId>org.projectlombok</groupId><artifactId>lombok</artifactId><version>1.18.20</version></dependency><dependency><groupId>org.apache.pdfbox</groupId><artifactId>pdfbox</artifactId><version>2.0.25</version></dependency>
3.创建一个类,复制代码,执行main方
package com.demo.controller;import com.demo.bean.FileBean;
import org.apache.http.HttpHost;
import org.apache.tika.Tika;
import org.apache.tika.mime.MediaType;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import com.alibaba.fastjson.JSON;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Base64;public class EsController {private static EsController FileToBase64;// 使用tika库自动获取文件类型public static String getFileTypeByDefaultTika(String filePathUrl) throws IOException, URISyntaxException {// 从 URL 创建一个 File 对象File file = new File(new URL("file:///" + filePathUrl).toURI());// 使用 Tika 来检测文件的 MIME 类型Tika tika = new Tika();MediaType mediaType = MediaType.parse(tika.detect(file));// 从 MIME 类型中提取文件的基本类型(如 pdf、image、video 等)String fileType = mediaType.getSubtype();return fileType;}// 转换文件为base64public static String fileToBase64(String filePath) throws IOException {byte[] fileContent = Files.readAllBytes(Paths.get(filePath));return Base64.getEncoder().encodeToString(fileContent);}// 根据文件类型判断排除音视频类文件public static String fileFilterate(String pathUrl) {try {String fileType = getFileTypeByDefaultTika(pathUrl);if (!fileType.contains("video")&& !fileType.contains("image")&& !"application/zip".equals(fileType)) {return fileToBase64(pathUrl);}return "";} catch (IOException e) {e.printStackTrace();return "";} catch (URISyntaxException e) {e.printStackTrace();return "";}}public static void main(String[] args) throws IOException {// 初始化RestHighLevelClient,localhost就是ES的ip地址,端口号为9200RestClientBuilder builder = RestClient.builder(new HttpHost("localhost", 9200, "http"));RestHighLevelClient client = new RestHighLevelClient(builder);//文件转成base,存入ES中String path = "C:\\Users\\83677\\Desktop\\测试4.docx";String file_base64 = FileToBase64.fileFilterate(path);//拿到base64,存入ES中FileBean filebean = new FileBean();filebean.setFile_id("1");filebean.setFile_name("测试4.docx");filebean.setFile_url("http://文件存储地址:8080/xxx/docs/raw/master/性能分析与内存问题排查思考.pdf");filebean.setFile_type("docx");filebean.setContent(file_base64);filebean.setFile_size("33");filebean.setFile_dir_name("yryy");filebean.setFile_suffix(".docx");filebean.setGroup_file_id("1234653");//把实体对象转为字符串String body = JSON.toJSONString(filebean);//file_data 是索引名称,这里就是插入数据到ES的核心部分IndexRequest indexRequest = new IndexRequest().index("file_data").source(body, XContentType.JSON)//请求参数,类型为JSON.setPipeline("attachment") //上传时使用attachment pipline进行提取文件.timeout(TimeValue.timeValueMinutes(10));client.index(indexRequest, RequestOptions.DEFAULT);// 关闭客户端client.close();}
}
4.实体类对象,我的示例
package com.demo.bean;import lombok.Data;@Data
public class FileBean {private String file_id;private String file_name;private String file_url;private String file_type;private String content;private String group_file_id;private String file_suffix;private String file_size;private String file_dir_name;}