Java 存儲(chǔ)過程
Java 存儲(chǔ)過程Java 代碼create or replace and compile java source named syndomain as import java.io.IOExcep
Java 存儲(chǔ)過程
Java 代碼
create or replace and compile java source named syndomain as import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.ResultSet;
import com.mysql.jdbc.Driver;
import oracle.jdbc.driver.OracleDriver;
import java.text.SimpleDateFormat;
import java.util.Date;
public class SynDomain
{
public static void syn()
{
Connection conn=null ;
Connection conn2=null ;
PreparedStatement psmt=null ;
PreparedStatement psmt2=null ;
try {
//Class.forName("com.mysql.jdbc.Driver");
DriverManager.registerDriver(new
com.mysql.jdbc.Driver());
DriverManager.registerDriver(new
oracle.jdbc.driver.OracleDriver());
//Class.forName("com.mysql.jdbc.Driver");
//從mysql 數(shù)據(jù)庫中同步數(shù)據(jù)到oracle
int i=0;
conn=DriverManager.getConnection("jdbc:mysql://192.168.10.10:3306/ism_jiangsu?autoReconnect=true&characterEncoding=utf8&mysqlEncoding=utf8", "root" , "ism" );
conn2=DriverManager.getConnection("jdbc:oracle:thin:@127.0.0.1:1521:orcl" , "idcqc" , "idcqc" );
SimpleDateFormat sdf=new
SimpleDateFormat("yyyy-MM-dd" );
,String sql="SELECT CONCAT('http://www.',domain) FROM crawl_site where add_time>'" sdf.format(new Date()) "'" ;
String sql2="insert into url_enter(id,url,status) values (URL_ENTER_SEQ.nextval,?,0)";
psmt=conn.prepareStatement(sql);
psmt2=conn2.prepareStatement(sql2);
ResultSet rs=psmt.executeQuery();
while (rs.next()){
String url=rs.getString(1);
psmt2.clearParameters();
psmt2.setString(1,url);
try {
psmt2.execute();
}catch (Exception e){
}
i ;
}
System.out.println(i);
//從oracle 數(shù)據(jù)庫同步iswap 到mysql
i=0;
sql="UPDATE crawl_site SET iswap=1 WHERE domain=?"; sql2="select * from url_enter where iswap=1"; psmt=conn.prepareStatement(sql);
psmt2=conn2.prepareStatement(sql2);
rs=psmt.executeQuery();
while (rs.next()){
String url=rs.getString(1);
url=UrlUtil.GetServerDomain(UrlUtil.extractDomain(url));
psmt.clearParameters();
psmt.setString(1,url);
try {
psmt.execute();
}catch (Exception e){
}
i ;
}
}/*catch (ClassNotFoundException e) {
e.printStackTrace(); e.printStackTrace(); }*/ catch (SQLException e) { }finally {
,if (psmt!=null ){
} if (conn!=null ){ } try { } conn.close(); e.printStackTrace(); } catch (SQLException e) { try { } psmt.close(); e.printStackTrace(); } catch (SQLException e) { if (psmt2!=null ){ try { } } if (conn2!=null ){ } try { } conn2.close(); e.printStackTrace(); } catch (SQLException e) { } psmt2.close(); e.printStackTrace(); } catch (SQLException e) {
}
}
create or replace and compile java source named urlutil as import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class UrlUtil {
public static int dcounts = 0;
,"|.gd|.gs|.hn|.hk|.io|.in|.it|.im|.info|.jp|.kr|.kz|.la|.li|.name "|.tc|.tel|.tv|.tk|.tl|.tw|.th|.us|.uk|.vc|.vg|.ws" "|.其它|.中國|.公司|.網(wǎng)絡(luò)|.政務(wù)|.公益|.中國|").split("|"); |.nz|.nu|.nl|.ma|.me|.ms|.mobi|.ru|.sh|.sg|.sc|.se" //后綴長度為1的域名 public static String[] domainRulesLength1 = (".my|.com|.cn|.net|.org|.gov|.ac|.asia|.ai|.am|.at|.biz|.bz|.ca|.cc|.cd|.ch|.cz|.cm|.cx|.co|.de|.edu|.eu|.es|.fm|.fr|.gg" //后綴長度為2的域名 public static String[]
domainRulesLength2=(".ac.cn|.com.cn|.org.cn|.gov.cn|.edu.cn|.net.cn|.com.hk|.com.tw|.com.sg|.com.au"
// 對域名進(jìn)行處理,如果域名是二級域名,則查詢的時(shí)候按照一級域名處理 public static String getTopDomain(String domain) { domain = domain.toLowerCase(); String []domainArray = domain.split("."); String domainLongEnd = "" ; //改方法返回頂級域名。 public static String GetServerDomain(String str) { } return getTopDomain(str); } public static boolean IsNumeric(String str) { Pattern pattern = Pattern.compile("[0-9]*"); Matcher isNum = pattern.matcher(str); if (!isNum.matches()) { } return true ; return false ; "|.co.kr|.kmac.to|.com.au|.org.nz|.org.au|.co.nz|.sh.cn|.net.ru|. "|.js.cn|.tj.cn|.bj.cn|.cq.cn|.he.cn|.nm.cn|.ln.cn|.jl.cn|.hl.cn| "|.sc.cn|.yn.cn|.xz.cn|.sn.cn|.gs.cn|.qh.cn|.sx.cn|.zj.cn|.hb.cn|co.uk" .ah.cn|.fj.cn|.jx.cn|.sd.cn|.ha.cn|.hn.cn|.gd.cn|.gx.cn|.hi.cn" .gz.cn|.nx.cn|.hk.cn|.xj.cn|.tw.cn|.mo.cn|.yn.cn").split("|");
,{
{
String dingjidomain = "" ; boolean ismatch=false ; //int num=0; //先匹配長度為2的域名 for (int i=0; i System.out.println(GetServerDomain("mail.a.com")); System.out.println(getTopDomain("njhysh.cn" )); System.out.println(getDcounts("www.baidu.com")); // } // public static int getDcounts(String str) { } public static boolean isIpAddress(String domain) { String regex = GetServerDomain(str); return dcounts; "(((d{1,2})|(1d{2})|(2[0-4]d)|(25[0-5])).){3}((d{1,2})|(1d{2})|(2[0-4]d)|(25[0-5]))"; } public static String extractDomain(String str) { } String domain = str; String domainRegex = Pattern pattern = Pattern.compile(domainRegex); Matcher matcher = pattern.matcher(str); if (matcher.find()) { } return domain; domain = matcher.group(); } Pattern p = Pattern.compile(regex); Matcher m = p.matcher(domain); if (m.find()) {// 是IP 地址 return true ; // out.println(domain " 3");// 按照IP 地址的查詢方式查詢 } else { } return false ; "[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(.[a-zA-Z0-9][-a-zA-Z0-9]{0,62}) "; 存儲(chǔ)過程代碼: create or replace procedure synDomain as language java name 'SynDomain.syn()';