Selenium是一个用于Web应用程序测试的工具,但是也可以爬取页面中的数据。
开发环境是内网(局域网),项目工程是web项目,jdk使用的1.8,tomcat使用的 8。
web项目: 启动时使用tomcat, tomcat会先加载web.xml配置文件里内容;
maven项目: 和web项目的区别是,需要的jar包,交给maven维护,不用自己导入jar包;
java项目: 启动时,使用main方法,没有web.xml配置文件;
一、创建项目
web.xml:web项目的配置文件,随着tomcat启动而加载;
TimerConfig.xml:springMVC定时配置文件;
proxool.xml: 配置数据库连接池;
log4j.properties:日志文件;
hibernate.cfg.xml: hibernate配置文件;
LoadsRealTimeTask: 定时任务类;
二、环境准备
1、导入selenium所需要的包
2、导入项目所需要的包
3、安装chromedriver.exe
因为我使用的谷歌浏览器来打开页面,所以需要将chromedriver.exe安装到Chrome目录下。
三、编写代码
web.xml
<?xml version="1.0" encoding="UTF-8"?>
<web-app xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://java.sun.com/xml/ns/javaee"
xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_3_0.xsd"id="WebApp_ID" version="3.0"><display-name>GDreptile</display-name><welcome-file-list><welcome-file>index.html</welcome-file><welcome-file>index.htm</welcome-file><welcome-file>index.jsp</welcome-file><welcome-file>default.html</welcome-file><welcome-file>default.htm</welcome-file><welcome-file>default.jsp</welcome-file></welcome-file-list><!-- 定时器配置文件--><context-param> <param-name>contextConfigLocation</param-name> <param-value>/WEB-INF/TimerConfig.xml</param-value> </context-param><listener> <listener-class> org.springframework.web.context.ContextLoaderListener </listener-class> </listener> <!-- proxool --><servlet><servlet-name>ServletConfigurator</servlet-name><servlet-class>org.logicalcobwebs.proxool.configuration.ServletConfigurator</servlet-class><init-param><param-name>xmlFile</param-name><param-value>/WEB-INF/proxool.xml</param-value></init-param><load-on-startup>1</load-on-startup></servlet><!-- proxool提供的管理监控工具,可查看当前数据库连接情况。如果运行不成功,请删除本行 --><servlet><servlet-name>Admin</servlet-name><servlet-class>org.logicalcobwebs.proxool.admin.servlet.AdminServlet</servlet-class></servlet><servlet-mapping><servlet-name>Admin</servlet-name><url-pattern>/admin</url-pattern></servlet-mapping>
</web-app>
TimerConfig.xml
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:context="http://www.springframework.org/schema/context"xmlns:mvc="http://www.springframework.org/schema/mvc" xmlns:tx="http://www.springframework.org/schema/tx"xmlns:task="http://www.springframework.org/schema/task"xsi:schemaLocation="http://www.springframework.org/schema/beanshttp://www.springframework.org/schema/beans/spring-beans-3.1.xsdhttp://www.springframework.org/schema/contexthttp://www.springframework.org/schema/context/spring-context-3.1.xsdhttp://www.springframework.org/schema/txhttp://www.springframework.org/schema/tx/spring-tx-3.1.xsdhttp://www.springframework.org/schema/task http://www.springframework.org/schema/task/spring-task-3.1.xsd"><!-- springMVC 定时器开关 --><task:annotation-driven /><bean id="historyTask" class="com.sgcc.gridDispa.LoadsHistoryTask"></bean><bean id="realTimeTask" class="com.sgcc.gridDispa.LoadsRealTimeTask"></bean><task:scheduled-tasks><task:scheduled ref="historyTask" method="run" cron="0 30 7 * * ?" /> <!-- 这里表示的是每天7:30执行一次 --> <task:scheduled ref="realTimeTask" method="run" cron="0 20 0-23 * * ?" /> <!-- 这里表示的是每小时20分执行一次 --></task:scheduled-tasks></beans>
proxool.xml
<?xml version="1.0" encoding="UTF-8"?>
<something-else-entirely><proxool> <alias>proxoolpool</alias><driver-url>jdbc:oracle:thin:@XX.XX.XX.XX:1521/XXXXX</driver-url> <driver-class>oracle.jdbc.driver.OracleDriver</driver-class> <driver-properties> <property name="user" value="XXXXX" /> <property name="password" value="XXXXX" /> </driver-properties> <maximum-connection-count>200</maximum-connection-count><minimum-connection-count>10</minimum-connection-count> <house-keeping-sleep-time>30000</house-keeping-sleep-time> <maximum-new-connections>10</maximum-new-connections> <prototype-count>5</prototype-count> <test-before-use>true</test-before-use> <house-keeping-test-sql>select sysdate from dual</house-keeping-test-sql> </proxool> </something-else-entirely>
log4j.properties
log4j.rootLogger=DEBUG,console,FILE log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.threshold=INFO
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yyyy-MM-dd HH\:mm\:ss} [%5p] - %c -%F(%L) -%m%n log4j.appender.FILE=org.apache.log4j.RollingFileAppender
log4j.appender.FILE.Append=true
log4j.appender.FILE.File=D:/log/pachong/logs
log4j.appender.FILE.Threshold=INFO
log4j.appender.FILE.layout=org.apache.log4j.PatternLayout
log4j.appender.FILE.layout.ConversionPattern=%d{yyyy-MM-dd HH\:mm\:ss} [%5p] - %c -%F(%L) -%m%n
log4j.appender.FILE.MaxFileSize=10MB
hibernate.cfg.xml
<!DOCTYPE hibernate-configuration PUBLIC"-//Hibernate/Hibernate Configuration DTD 3.0//EN""http://hibernate.sourceforge.net/hibernate-configuration-3.0.dtd"><hibernate-configuration>
<session-factory><!-- hibernate自身属性相关参数 --><property name="dialect">com.sgcc.gridDispa.utils.BlobOracleDialect</property><!-- <property name="hbm2ddl.auto">update</property> --><property name="hibernate.jdbc.batch_size">50</property><!-- 设置连接数 --><!-- <property name="connection.pool_size">60</property> --><property name="show_sql">false</property><property name="format_sql">false</property><property name="current_session_context_class">thread</property><!-- 提交事务后关闭连接 --><property name="connection.release_mode">after_transaction</property><!-- 提交事务后关闭会话 --><property name="transaction.auto_close_session">true</property><!-- 统计信息 --><property name="hibernate.generate_statistics">true</property><!-- proxool连接池 --><property name="hibernate.proxool.pool_alias">proxoolpool</property><property name="hibernate.proxool.xml">proxool.xml</property><property name="hibernate.connection.provider_class">org.hibernate.connection.ProxoolConnectionProvider</property><property name="hibernate.proxool.existing_pool">true</property><!-- 映射文件的注册 --><mapping resource="com/sgcc/gridDispa/po/LoadsHistory.hbm.xml" /><mapping resource="com/sgcc/gridDispa/po/LoadsToday.hbm.xml" /><mapping resource="com/sgcc/gridDispa/po/TgridLoads.hbm.xml" /><mapping resource="com/sgcc/gridDispa/po/LoadsRealTime.hbm.xml" /></session-factory>
</hibernate-configuration>
LoadsRealTimeTask
package com.sgcc.gridDispa;import java.util.TimerTask;import com.sgcc.gridDispa.impl.LoadsHistoryImpl;
import com.sgcc.gridDispa.impl.LoadsRealTimeImpl;public class LoadsRealTimeTask extends TimerTask{LoadsRealTimeImpl tsk= new LoadsRealTimeImpl();@Overridepublic void run() {try {Thread thread=new Thread(tsk);thread.start();} catch (Exception e) {e.printStackTrace();}}
}
utils文件夹下
WebDriverUtil.java
package com.sgcc.gridDispa.utils;import java.util.concurrent.TimeUnit;import org.openqa.selenium.Dimension;
import org.openqa.selenium.Platform;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeDriverService;
import org.openqa.selenium.remote.CapabilityType;/*** @Description:crawler* @Author: old* @CreateTime:2017-11-15 :15:16:16*/
public class WebDriverUtil {/*** 创建Chrome** @param path 路径* @return* @throws Exception*/public static WebDriver createChromeWebDriver(String path) throws Exception {if (path == null || "".equals(path)) {throw new Exception("配置错误, 没有配置:chrome path");}System.setProperty("webdriver.chrome.driver", path);WebDriver webDriver = new ChromeDriver();webDriver.manage().timeouts().pageLoadTimeout(1200, TimeUnit.SECONDS);webDriver.manage().window().setSize(new Dimension(1024, 768));return webDriver;}}
LogWriter.java
package com.sgcc.gridDispa.utils;import org.apache.log4j.Logger;public class LogWriter {private static Logger logger = Logger.getLogger(LogWriter.class);public static void error(Object obj){logger.error(obj);}public static void error(Object message,Throwable obj){logger.error(message,obj);}public static void info(Object obj){logger.info(obj);}public static String getError(Throwable e){StringBuilder sb=new StringBuilder();sb.append(e.toString()+System.getProperty("line.separator"));StackTraceElement[] trace = e.getStackTrace();for (int i=0; i < trace.length; i++)sb.append("\tat " + trace[i]+System.getProperty("line.separator"));return sb.toString();}
}
JDBCUtil.java
package com.sgcc.gridDispa.utils;import org.hibernate.Session;
import org.hibernate.SessionFactory;
import org.hibernate.cfg.Configuration;
import org.hibernate.stat.SessionStatistics;
import org.hibernate.stat.Statistics;
/*** 获得hibernate session对象* @author kuang**/
public final class JDBCUtil {private static SessionFactory sessionFactory ;private JDBCUtil(){}static{try{sessionFactory = new Configuration().configure("hibernate.cfg.xml").buildSessionFactory();}catch(Exception e){e.printStackTrace();LogWriter.error(e);}}public static Session getThreadSession(){return sessionFactory.getCurrentSession();}public static Session noOpen(){return getThreadSession();}public static Session open(){getThreadSession().beginTransaction();return getThreadSession();}public static void commit(){getThreadSession().getTransaction().commit();getThreadSession().close();}public static void close(){if(getThreadSession()!=null)getThreadSession().close();}public static void getStatistics(){SessionStatistics ss = getThreadSession().getStatistics();LogWriter.info("SessionStatistics:"+ss);Statistics st = sessionFactory.getStatistics();LogWriter.info("Statistics:"+st);}public static void rollback(){getThreadSession().getTransaction().rollback();}
}
BlobOracleDialect.java
package com.sgcc.gridDispa.utils;import java.sql.Types;
import org.hibernate.Hibernate;
import org.hibernate.dialect.OracleDialect;public class BlobOracleDialect extends OracleDialect {public BlobOracleDialect(){super();registerHibernateType(Types.LONGVARBINARY,Hibernate.BLOB.getName());}
}
BasicDaoImpl.java
package com.sgcc.gridDispa.utils;import java.util.List;import org.hibernate.HibernateException;
import org.hibernate.Query;
import org.hibernate.SQLQuery;
import org.hibernate.Session;
import org.hibernate.transform.Transformers;//obj的属性名
public class BasicDaoImpl<T> {/** 保存实体对象*/public void saveOrUpdate(T t) {try {JDBCUtil.open().save(t);JDBCUtil.commit();} catch (HibernateException e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{JDBCUtil.close();}}/** 查询sql,返回list*/public List queryListBySql(String sql){try {Session session =JDBCUtil.open();SQLQuery sqlQuery = session.createSQLQuery(sql);List result = sqlQuery.list();JDBCUtil.commit();return result;} catch (HibernateException e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{JDBCUtil.close();}return null;}/** 查询sql,返回list Map*/public List queryListMapBySql(String sql){try {Session session =JDBCUtil.open();SQLQuery sqlQuery = session.createSQLQuery(sql);Query query =sqlQuery.setResultTransformer(Transformers.ALIAS_TO_ENTITY_MAP);List result = sqlQuery.list();JDBCUtil.commit();return result;} catch (HibernateException e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{JDBCUtil.close();}return null;}/*** 对获取到的气象数据进行过滤,对无效、null进行处理* @return 过滤后的数据*/protected String filterMothed(String object) {if(object.contains("9999")){return "";}if(object.equals("null")){return "";}if(object==null){return "";}return object.trim();}
}
impl文件夹下:
LoadsRealTimeImpl.java
package com.sgcc.gridDispa.impl;import java.math.BigDecimal;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.List;
import java.util.Map;import org.apache.log4j.Logger;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;import com.sgcc.gridDispa.po.LoadsHistory;
import com.sgcc.gridDispa.po.LoadsRealTime;
import com.sgcc.gridDispa.utils.BasicDaoImpl;
import com.sgcc.gridDispa.utils.WebDriverUtil;public class LoadsRealTimeImpl extends BasicDaoImpl implements Runnable{private Logger logger=Logger.getLogger(LoadsRealTimeImpl.class);@Overridepublic void run() {logger.info("=======各省实时负荷数据【定时任务】===============");saveAllRealTimeLoad();}/*** 将各省实时负荷数据写入数据库*/public synchronized void saveAllRealTimeLoad(){WebDriver webDriver = null;try {webDriver = WebDriverUtil.createChromeWebDriver("D:\\chrome\\Chrome\\Application\\chromedriver.exe");/*webDriver = WebDriverUtil.createChromeWebDriver("D:\\基础软件\\Chrome\\Application\\chromedriver.exe");*/webDriver.get("http://10.19.13.50:8080//MWWebSite//PROJECT-HOME//exchange//YYJC//AJBZHDPSJ.jsp");Thread.sleep(3000);System.out.println(webDriver.getTitle());System.out.println(webDriver.getPageSource());WebElement webBody=webDriver.findElement(By.xpath("//body"));String bodyStr=webBody.getText();String[] bodyStrs=bodyStr.split("\n");String bool=",";for (int i = 47; i < bodyStrs.length; i++) {String date="";if(i==47){int index=bodyStrs[0].indexOf("='");date=bodyStrs[0].substring(index+2, index+12);}if(i>=49 && i<=85){String allLoad=bodyStrs[i].replaceAll("\\s{2,}", ",").trim();String[] allLoadStr=allLoad.split(",");LoadsRealTime loadRealTime = new LoadsRealTime();loadRealTime.setDeptName(allLoadStr[1]); //电网名称//处理电网、省公司idif(allLoadStr[1].equals("华北电网") || allLoadStr[1].equals("华东电网") || allLoadStr[1].equals("华中电网") || allLoadStr[1].equals("东北电网") || allLoadStr[1].equals("西北电网") || allLoadStr[1].equals("西南电网")){List<Map> result = queryAreaIdByname(allLoadStr[1]);if(result.size() >0){for(Map map : result){String companyId = map.get("COMPANY_ID").toString();loadRealTime.setDeptId(companyId);}}}else{List<Map> result = queryCompanyIdByname(allLoadStr[1]);if(result.size() >0){for(Map map : result){String companyId = map.get("COMPANY_ID").toString();loadRealTime.setDeptId(companyId);}}}Float yesterdayLoad = Float.parseFloat(allLoadStr[2])/10; //实时负荷loadRealTime.setRealtimeLoad(Math.round(yesterdayLoad)+"");SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");int index = allLoadStr[3].indexOf("'");String rkTime=allLoadStr[3].substring(index+1, index+20);loadRealTime.setRkTie(format.parse(rkTime));this.saveOrUpdate(loadRealTime);}}} catch (Exception e) {e.printStackTrace();} finally {if (webDriver != null) {//webDriver.close();webDriver.quit();}}}/*** 根据省公司名称查询公司id*/private List queryCompanyIdByname(String companyName){String sql= "select y.company_id from t_company y where y.dwjb='3' ";if(companyName != null && !companyName.equals("")){sql += "and y.company_name like '%" + companyName + "%' ";}return this.queryListMapBySql(sql);}/*** 根据各分部电网查询分部id*/private List queryAreaIdByname(String companyName){String sql= "select y.company_id from t_company y where y.dwjb='2' ";if(companyName != null && !companyName.equals("")){companyName = companyName.replace("电网", "");sql += "and y.company_name like '%" + companyName + "%' ";} return this.queryListMapBySql(sql);}
}
四、页面中数据