1.引入jsoup
<dependency><groupId>org.jsoup</groupId><artifactId>jsoup</artifactId><version>1.8.3</version>
</dependency>
2. html示例
示例代码:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>个人信用报告</title><style media=print>.Noprint { display: none; } </style><style type="text/css">body{ background: #FFFFFF; } table { border-collapse: collapse; border: 0; width: 70%; } td { border: #000000 solid 1px; padding: 4px; font-size: 13px; text-align: center; } .thead { background: #FFFFFF; font-weight: bold; } a{ position: relative; z-index: 1; } #side-nav { position: fixed; _position: absolute; top: 10px; right: 10px; border: 1px black solid; width: 14%; z-index: 1; } .nav-style { border-style: none; font-size: 13px; font-weight: bold; background: #FFFFFF; } .report-title { border-style: none; font-size: 24px; font-weight: bold; text-align: center; } .title-style1 { border-style: none; font-size: 21px; font-weight: bold; text-align: center; } .title-style2 { border-style: none; font-size: 15px; font-weight: bold; text-align: center; } .title-style3 { font-size: 13px; font-weight: bold; text-align: center; } .td-no-border { border-style: none; } .table-tips { font-style: italic; font-size: 10px; text-align: left; } .td-no-report { border: #FFF solid 0px; font-size: 22px; font-weight: bold; } .watermark { position: fixed; background: no-repeat center; transform: rotate(-30deg); filter: alpha(opacity=20); opacity: .20; } </style>
</head><body><div id="side-nav" class="Noprint"><table align="center"><tr><td class="nav-style" align="center"><a href="#Header">报告头</a></td>
</tr><tr><td class="nav-style" align="center"><a href="#PersonalInfo">个人基本信息</a></td>
</tr><tr><td class="nav-style" align="center"><a href="#InfoSummary">信息概要</a></td>
</tr><tr><td class="nav-style" align="center"><a href="#CreditDetail">信贷交易信息明细</a></td>
</tr><tr><td class="nav-style" align="center"><a href="#QueryRecord">查询记录</a></td>
</tr>
</table>
</div><div align='center' class='watermark' style='top:15%;'><font size='5px' color='gray'>test99</font><br/><font size='5px' color='gray'>1</font><font size='5px' color='gray'>20231030155315</font>
</div><div align='center' class='watermark' style='top:15%;left:33%'><font size='5px' color='gray'>test99</font><br/><font size='5px' color='gray'>1</font><font size='5px' color='gray'>20231030155315</font>
</div><div align='center' class='watermark' style='top:15%;left:66%'><font size='5px' color='gray'>test99</font><br/><font size='5px' color='gray'>1</font><font size='5px' color='gray'>20231030155315</font>
</div><div align='center' class='watermark' style='top:50%;'><font size='5px' color='gray'>test99</font><br/><font size='5px' color='gray'>1</font><font size='5px' color='gray'>20231030155315</font>
</div><div align='center' class='watermark' style='top:50%;left:33%'><font size='5px' color='gray'>test99</font><br/><font size='5px' color='gray'>1</font><font size='5px' color='gray'>20231030155315</font>
</div><div align='center' class='watermark' style='top:50%;left:66%'><font size='5px' color='gray'>test99</font><br/><font size='5px' color='gray'>1</font><font size='5px' color='gray'>20231030155315</font>
</div><div align='center' class='watermark' style='top:85%;'><font size='5px' color='gray'>test99</font><br/><font size='5px' color='gray'>1</font><font size='5px' color='gray'>20231030155315</font>
</div><div align='center' class='watermark' style='top:85%;left:33%'><font size='5px' color='gray'>test99</font><br/><font size='5px' color='gray'>1</font><font size='5px' color='gray'>20231030155315</font>
</div><div align='center' class='watermark' style='top:85%;left:66%'><font size='5px' color='gray'>test99</font><br/><font size='5px' color='gray'>1</font><font size='5px' color='gray'>20231030155315</font>
</div><div><div class="report-title" style="margin-top: 15px;">个人信用报告</div><div class="title-style2" style="margin-top: 8px;">(授信机构版)</div>
</div><div id="Header"><table align="center"><tr><td class="td-no-border" style="text-align:left">报告编号:2023100117121647378010</td><td class="td-no-border" style="text-align:right">报告时间:2023-10-23 17:18:16</td>
</tr>
</table><table align="center"><tr class="thead"><td>被查询者姓名</td><td>被查询者证件类型</td><td>被查询者证件号码</td><td>查询机构</td><td>查询原因</td>
</tr><tr><td>杨晴晴</td><td>身份证</td><td>32082719901102604X</td><td>1 </td><td>贷后管理</td>
</tr>
</table><br/></div><div id="PersonalInfo"><div class="title-style1">一 个人基本信息</div><br/><div class="title-style2">(一)身份信息</div><table align="center"><tr class="thead"><td>性别</td><td>出生日期</td><td>婚姻状况</td><td>学历</td><td>学位</td><td>就业状况</td><td>国籍</td><td>电子邮箱</td>
</tr><tr><td>男</td><td>1980-06-30</td><td>--</td><td>初中及以下</td><td>--</td><td>在职</td><td>中国</td><td>1309867680@qq.com</td>
</tr><tr class="thead"><td colspan="5">通讯地址</td><td colspan="3">户籍地址</td>
</tr><tr><td colspan="5">福建省福州市平潭县苏澳镇斗魁村斗魁58号</td><td colspan="3">--</td>
</tr><tr class="thead"><td>编号</td><td colspan="4">手机号码</td><td colspan="3">信息更新日期</td>
</tr><tr><td>1</td><td colspan="4">18855583001</td><td colspan="3">2019-07-18</td>
</tr><tr><td>2</td><td colspan="4">18855583002</td><td colspan="3">2019-06-25</td>
</tr><tr><td>3</td><td colspan="4">18855583003</td><td colspan="3">2019-04-26</td>
</tr><tr><td>4</td><td colspan="4">18855583004</td><td colspan="3">2019-01-26</td>
</tr><tr><td>5</td><td colspan="4">15105055999</td><td colspan="3">2018-11-14</td>
</tr><tr><td>6</td><td colspan="4">13023801517</td><td colspan="3">2018-09-11</td>
</tr><tr><td>7</td><td colspan="4">18250182015</td><td colspan="3">2017-09-07</td>
</tr><tr><td>8</td><td colspan="4">015105055999</td><td colspan="3">2017-06-29</td>
</tr>
</table><br/><div class="title-style2">(二)居住信息</div><table align="center"><tr class="thead"><td>编号</td><td>居住地址</td><td>住宅电话</td><td>居住状况</td><td>信息更新日期</td>
</tr><tr><td>1</td><td>地址1</td><td>--</td><td>未知</td><td>2019-07-18</td>
</tr><tr><td>2</td><td>地址2</td><td>--</td><td>未知</td><td>2019-06-25</td>
</tr><tr><td>3</td><td>地址3</td><td>--</td><td>未知</td><td>2019-04-26</td>
</tr><tr><td>4</td><td>地址4</td><td>--</td><td>未知</td><td>2019-01-26</td>
</tr><tr><td>5</td><td>--</td><td>--</td><td>未知</td><td>2018-11-14</td>
</tr><tr><td>6</td><td>福建省福州市平潭县苏澳镇斗魁村斗魁58号</td><td>--</td><td>未知</td><td>2018-10-18</td>
</tr><tr><td>7</td><td>福建省福州市平潭县苏沃镇斗魁村斗魁58号</td><td>0591-23106098</td><td>其他</td><td>2018-06-09</td>
</tr><tr><td>8</td><td>福州市</td><td>--</td><td>未知</td><td>2018-05-24</td>
</tr><tr><td>9</td><td>福建省福州市仓山区金山融信西班牙8—102</td><td>--</td><td>亲属楼宇</td><td>2017-09-25</td>
</tr>
</table><br/></div><br/><div align="center" class="Noprint"><hr align="center" width="70%" size="1" noshade=""/><input type="button" value="打印信用报告" onclick="javascript:printPage();"/><br/></div><br/><br/><script type="text/javascript"> window.onload=function() { //禁止复制 document.oncopy = function () { return false; }; //禁止粘贴 document.onpaste = function () { return false; }; //禁止剪切 document.oncut = function () { return false; }; //禁止右键菜单 document.oncontextmenu = function () { return false; }; //禁止文本选择 document.onselectstart = function () { return false; }; };</script></body>
</html>
3.java代码实现
比如说我们想删掉某一个div 可以根据div的id class等进行操作处理
public static void main(String[] args) throws IOException {String filePath = "/Users/xxxx/Downloads/a.html";File file = new File(filePath);// 解析HTML文本Document document = Jsoup.parse(file, "UTF-8");// 选择要移除的div元素(可以根据id、class或其他属性来选择)Elements divsToRemove = document.select("div#Header");Element table = divsToRemove.select("table").get(1);if (table != null) {Element rowToDelete = table.select("tr").get(0); // 获取第一行(索引从0开始)Element rowToDelete2 = table.select("tr").get(1); // 获取第二行(索引从0开始)// 删除表格行rowToDelete.remove();rowToDelete2.remove();}Elements divsToRemove2 = document.select("div#PersonalInfo");divsToRemove2.remove();// 输出更新后的HTML文本String updatedHtml = document.outerHtml();System.out.println(updatedHtml);}
根据class删除代码示例:
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;public class RemoveDivsByClass {public static void main(String[] args) {// 输入的HTML文本String htmlText = "<html><body><div id=\"content\"><div class=\"delete-me\"><p>子元素1</p></div><div class=\"keep-me\"><p>子元素2</p></div><div class=\"delete-me\"><p>子元素3</p></div></div></body></html>";// 解析HTML文本Document document = Jsoup.parse(htmlText);// 查找特定的<div>元素(这里使用id="content"作为示例)Element divElement = document.select("div#content").first();if (divElement != null) {// 查找包含特定class属性值的子<div>元素并删除Elements divsToDelete = divElement.select("div.delete-me");divsToDelete.remove();}// 输出更新后的HTML文本String updatedHtml = document.outerHtml();System.out.println(updatedHtml);}
}