8月
31
2007
分類:
最近更新:
2007-08-31
無效連結偵測器 - Brokenlink detector
利用 Ajax 技術實作的無效連結偵測器。基本上,它是純 JavaScript 實作品,在 IE 和 Firefox 上都測試過。只有一個外部資源不是用 JavaScript 實作的,就是供 XmlHttpRequest 讀取其他網頁內容的 proxy 。

Brokenlink detector.htm
第525行設定 proxy 的網址。第669行到673行可調整是否偵測外部網域的連結頁面。
<!--?php
include '../../../include/cp_header.php';
if ( file_exists("../language/".$xoopsConfig['language']."/main.php") ) {
include "../language/".$xoopsConfig['language']."/main.php";
}
//global $xoopsDB;
//$myts =& MyTextSanitizer::getInstance();
$pagetitle = "<h4>"._MD_ADMIN_NAME."</h4>";
xoops_cp_header();
?-->
<script type="text/javascript">
// Brokenlink detector
// Copyright (C) 2006 shirock
// ------------------------------------------------------------------------ //
// This program is free software; you can redistribute it and/or modify //
// it under the terms of the GNU General Public License as published by //
// the Free Software Foundation; either version 2 of the License, or //
// (at your option) any later version. //
// //
// You may not change or alter any portion of this comment or credits //
// of supporting developers from this source code or any supporting //
// source code which is considered copyrighted (c) material of the //
// original comment or credit authors. //
// //
// This program is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU General Public License for more details. //
// //
// You should have received a copy of the GNU General Public License //
// along with this program; if not, write to the Free Software //
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA //
// ------------------------------------------------------------------------- //
/*
Simply XmlHttpRequest creator. by shirock.
*/
function NewXmlHttpRequest() {
var HttpRequest = null;
try {
HttpRequest = new XMLHttpRequest();
// for almost all browsers. (Maybe included M$IE7)
} catch (tryMSIE){
try {
HttpRequest = new ActiveXObject("Msxml2.XMLHTTP");
} catch (tryMSIE2) {
try {
HttpRequest = new ActiveXObject("Microsoft.XMLHTTP");
} catch (NotSupported) {
HttpRequest = null;
}
}
}
return HttpRequest;
}
function AsyncRequest(method, url, callback, postdata) {
this.method = method;
this.url = url;
this.setUrl = function(value) {
this.url = value;
}
this.callback = callback;
this.setCallback = function(value) {
this.callback = value;
}
this.postdata = postdata;
this.send = function() {
if( this.url ) {
this.HttpRequest = NewXmlHttpRequest();
var o = this;
this.HttpRequest.onreadystatechange = function() {
if (o.HttpRequest.readyState == 4) {
var obj = {
status: o.HttpRequest.status,
statusText: o.HttpRequest.statusText,
responseText: o.HttpRequest.responseText,
responseXML: o.HttpRequest.responseXML,
argument: o.callback.argument
};
if(o.HttpRequest.status >= 200 && o.HttpRequest.status < 300) {
o.callback.success(obj);
}
else {
o.callback.failure(obj);
}
}
}
this.HttpRequest.open(this.method, this.url, true);
this.HttpRequest.send(this.postdata);
}
}
return this;
}
/*
HTTP 1.1 status code define,
also see RFC-2616(http://www.faqs.org/rfcs/rfc2616)
by shirock.
*/
AsyncRequest.prototype.HTTPStatusCode = {
"100" : 'Continue',
"101" : 'Switching Protocols',
"200" : 'OK',
"201" : 'Created',
"202" : 'Accepted',
"203" : 'Non-Authoritative Information',
"204" : 'No Content',
"205" : 'Reset Content',
"206" : 'Partial Content',
"300" : 'Multiple Choices',
"301" : 'Moved Permanently',
"302" : 'Found',
"303" : 'See Other',
"304" : 'Not Modified',
"305" : 'Use Proxy',
"307" : 'Temporary Redirect',
"400" : 'Bad Request',
"401" : 'Unauthorized',
"402" : 'Payment Required',
"403" : 'Forbidden',
"404" : 'Not Found',
"405" : 'Method Not Allowed',
"406" : 'Not Acceptable',
"407" : 'Proxy Authentication Required',
"408" : 'Request Time-out',
"409" : 'Conflict',
"410" : 'Gone',
"411" : 'Length Required',
"412" : 'Precondition Failed',
"413" : 'Request Entity Too Large',
"414" : 'Request-URI Too Large',
"415" : 'Unsupported Media Type',
"416" : 'Requested range not satisfiable',
"417" : 'Expectation Failed',
"500" : 'Internal Server Error',
"501" : 'Not Implemented',
"502" : 'Bad Gateway',
"503" : 'Service Unavailable',
"504" : 'Gateway Time-out',
"505" : 'HTTP Version not supported'
}
</script>
<style type='text/css'>
.status_standby {
color: green;
}
.status_busy {
color: red;
}
#transactions_status caption {
width: 300px;
border-bottom: 1px #c0c0c0 double;
}
#transactions_status td {
border: 0;
width: 20px;
text-align: center;
}
#brokenlink ul {
display: block;
list-style-position: inside;
}
#brokenlink ul li {
}
#brokenlink ul .ok {
list-style-image: url(../images/bookopen.gif);
}
#brokenlink ul .error {
color: red;
list-style-image: url(../images/user_logout.gif);
}
#brokenlink ul li .href {
padding-right: 5px;
}
#brokenlink ul li .errorMessage {
padding-left: 5px;
border-left: 2px #000 solid;
text-decoration: underline;
color: red;
}
</style>
<input type='text' id='url' name='url' size='60' maxlength='1024' value='<?=XOOPS_URL?>/'/><br/>
<label>偵測深度: </label>
<select id='depth' name='depth'>
<option value='1'>1</option>
<option value='2'>2</option>
<option value='3'>3</option>
<option value='4'>4</option>
<option value='5'>5</option>
<option value='6'>6</option>
</select>
<label>工作數: </label>
<select id='poolsize' name='poolsize'>
<option value='1'>1</option>
<option value='5'>5</option>
<option value='10'>10</option>
<option value='15'>15</option>
<option value='20'>20</option>
</select>
<button type='button' id='check'>確定</button>
<table id='transactions_status' width='300' border='1' cellspacing='0' style='display: none;'>
<caption>工作指示燈</caption>
<tbody>
<tr>
</tr>
</tbody>
</table>
<div id='brokenlink'>
</div>
<ul id='s'>
</ul>
<script type='text/javascript'>
function _append_item(task, ul, level) {
var item = document.createElement('li');
ul.appendChild(item);
item.style.paddingLeft = (20*level) + 'px';
var spanHref = document.createElement('span');
item.appendChild(spanHref);
spanHref.innerHTML = task.url;
spanHref.className = 'href';
if(task.status.code == 200) {
item.className = 'ok';
}
else {
item.className = 'error';
var spanErrorMessage = document.createElement('span');
item.appendChild(spanErrorMessage);
spanErrorMessage.innerHTML = 'ERROR: ' + task.status.code
+ ', ' + task.status.text;
spanErrorMessage.className = 'errorMessage';
}
if(task.childUrls.length > 0 && task.level >= level) {
var subUl = document.createElement('ul');
ul.appendChild(subUl);
level++;
for(var i = 0; i < task.childUrls.length; i++) {
//var_dump( i + ' : ' + task.childUrls[i] );
if(transactions.visitedUrls[ task.childUrls[i] ]) {
subTask = transactions.visitedUrls[ task.childUrls[i] ].argument.task;
_append_item(subTask, subUl, level);
}
else {
window.alert(task.childUrls[i] + ' is not in visitedUrls.');
}
}
}
}
function showComplishedQueue() {
//var_dump(visitedUrls);
document.getElementById('transactions_status').style.display = 'none';
var r = document.getElementById('brokenlink');
var p = document.createElement('h3');
p.innerHTML = 'Complished!';
r.appendChild(p);
var ul = document.createElement('ul');
r.appendChild(ul);
_append_item(transactions.complishedQueue[0], ul, 0);
checkStandby();
};
function _global_transactions_status_change(index, stat) {
var status = document.getElementById('transactions_status');
status.getElementsByTagName('td')[index].className = 'status_' + stat;
}
function _global_transactions_status_standby(index) {
_global_transactions_status_change(index, 'standby');
}
function _global_transactions_status_busy(index) {
_global_transactions_status_change(index, 'busy');
}
function _global_transactions_status_init( size ) {
var status_bar = document.getElementById('transactions_status').getElementsByTagName('tr')[0];
var td = status_bar.getElementsByTagName('td');
while(td.length > 0) {
status_bar.removeChild( td[0] );
}
for(var i = 0; i < size; i++) {
var td = document.createElement('td');
status_bar.appendChild(td);
td.innerHTML = '#';
_global_transactions_status_standby(i);
}
}
function Callback(transactions) {
this.argument = {
'taskQueue': transactions.taskQueue,
'complishedQueue': transactions.complishedQueue,
'visitedUrls': transactions.visitedUrls,
'transactions': transactions,
'transactionsIndex': null,
'task': null
};
this.success = function(o) {
var task = o.argument.task;
var url = task.url;
//var level = task.level;
o.argument.visitedUrls[ task.url.toString() ] = o;
var isOutSide = true;
if( o.argument.transactions.urlBase == false
|| url.match( new RegExp('^' + o.argument.transactions.urlBase,'i') ) )
{
// if urlBase is false, it means no limit, can detect outer page.
// so think as not outside.
isOutSide = false;
}
if( !isOutSide && task.level < o.argument.transactions.depth) {
url = url.replace(/^\s*/, '').replace(/\s*$/, ''); // trim
var matches = url.split('?');
var baseUrl = matches[0];
var i = baseUrl.lastIndexOf('/');
if( i == baseUrl.indexOf('//')+1) {
baseUrl += '/';
}
else {
baseUrl = baseUrl.substring(0, i + 1 );
}
var host = baseUrl.substring(0, baseUrl.indexOf('/', baseUrl.indexOf('//')+2));
var matches = o.responseText.match(/<a[^<]*\s+href=(['"]?)[a-z0-9_:\.\/~%-]+.*\1/gi);
var wo_matches = o.responseText.match(/window\.open\((['"]?)[a-z0-9_:\.\/~%-]+.*\1/gi);
if(matches) {
if(wo_matches) {
matches = matches.concat(wo_matches);
}
}
else {
matches = wo_matches;
}
if(matches) {
var href;
for(var i = 0; i < matches.length; i++) {
if( matches[i].charAt(0) == '<') {
href = /<a[^<]*\s+href=(['"]?)([a-z0-9_:\.\/~%-]+).*\1/i.exec(matches[i])[2];
}
else {
href = /window\.open\((['"]?)([a-z0-9_:\.\/~%-]+).*\1/i.exec(matches[i])[2];
}
var isLocalHref = false;
// Does start with protocol:// ?
var parts = href.match(/^\w+:/);
if( !parts || parts.length <= 0) {
if(href.charAt(0) == '/') {
href = host + href;
}
else {
href = baseUrl + href;
}
isLocalHref = true;
}
else {
if(parts[0] != "http:"
&& parts[0] != "https:")
{
continue; // skip, not http or https
}
var indexLastDoubleSlashes = href.lastIndexOf('//');
var indexLastSlash = href.lastIndexOf('/');
if(indexLastDoubleSlashes == indexLastSlash - 1) {
href += '/';
// it's a root directory without '/' as trail char.
}
if(href.substring(0, host.length) == host) {
isLocalHref = true;
}
else {
isLocalHref = false;
}
}
this.appendChildUrls(task, href);
if( o.argument.visitedUrls[href] && o.argument.visitedUrls[href].argument ) {
// had visited, skip.
continue;
}
else {
o.argument.visitedUrls[ href.toString() ] = false;
o.argument.taskQueue.push( new Task(task.level+1, href) );
}
} // for each matches
} // end of if(matches)
} // end of if(level < o.argument.depth)
this.endTransaction(o);
var s = document.getElementById('s');
var item = document.createElement('li');
item.innerHTML = o.argument.task.url + ': ok.';
item.innerHTML += " childUrls: " + task.childUrls.length;
s.appendChild(item);
} // end of event handler success
this.failure = function(o) {
//window.alert('failure');
transactions.visitedUrls[ o.argument.task.url.toString() ] = o;
this.endTransaction(o);
var s = document.getElementById('s');
var item = document.createElement('li');
item.style.color = 'red';
item.innerHTML = o.argument.task.url + ': failure.';
item.innerHTML += " childUrls: " + o.argument.task.childUrls.length;
s.appendChild(item);
} // end of event handler failure
this.appendChildUrls = function(task, href) {
var isInChildUrls = false;
for(var i = 0; i < task.childUrls.length; i++) {
if( task.childUrls[i] == href) {
isInChildUrls = true;
break;
}
}
if( !isInChildUrls ) {
task.childUrls.push( href );
}
}
this.endTransaction = function(o) {
//release slot
o.argument.transactions.poolCheckout( o.argument.transactionsIndex );
this.appendComplishedTask(o);
this.checkPoolAndTask(o.argument.transactions);
} // end of function Callback.endTransaction
this.appendComplishedTask = function(o) {
var task = o.argument.task;
task.status.code = o.status;
if( AsyncRequest.prototype.HTTPStatusCode[ o.status.toString() ] ) {
task.status.text = AsyncRequest.prototype.HTTPStatusCode[ o.status.toString() ];
//the o.statusText always returns 'OK'...
}
else {
task.status.text = 'Undefined Error!';
}
o.argument.complishedQueue.push( task );
} // end of function Callback.appendComplishedTask
this.checkPoolAndTask = function(transactions) {
if(transactions.taskQueue.length <= 0
&& transactions.isPoolAllDone() == true)
{
window.clearInterval( transactions.interval );
//window.alert('complished');
if(!transactions.done) {
transactions.done = true;
showComplishedQueue();
}
}
} // end of function Callback.checkPoolAndTask
} // end of function Callback
function Task(level, url) {
this.level = level;
this.url = url;
this.status = {
code: null,
text: null
};
this.childUrls = new Array();
}
var transactions = {
Transaction: function Transaction(transactions) {
this.transaction = new AsyncRequest( 'GET', null, null, null );
this.callback = new Callback( transactions ); // pass transactions self.
},
taskQueue: new Array(),
clearTaskQueue: function() {
this.taskQueue = new Array();
},
complishedQueue: new Array(),
clearComplishedQueue: function() {
this.complishedQueue = new Array();
},
visitedUrls: {
}, // new empty object.
urlBase: false,
depth: 1,
done: false,
//proxy: '<?=XOOPS_URL?>/modules/brokenlink/admin/proxy.php',
proxy: 'proxy.php',
pool: [ null ],
poolBusy: 0,
setPool: function(size) {
this.pool = new Array(size);
for(var i = 0; i < size; i++) {
this.pool[i] = null;
}
poolBusy = 0;
_global_transactions_status_init(size);
},
poolCheckin: function(task) {
var index = false;
for(var i = 0; i < this.pool.length; i++) {
if(this.pool[i] == null) {
_global_transactions_status_busy(i);
this.pool[i] = new this.Transaction(this);
this.poolBusy++;
index = i;
this.pool[i].callback.argument.transactionsIndex = i;
this.pool[i].callback.argument.task = task;
this.pool[i].transaction.setUrl( this.proxy + '?url=' + task.url /*anchor.href*/ );
this.pool[i].transaction.setCallback( this.pool[i].callback );
this.pool[i].transaction.send();
break;
}
} // end of for
return index;
}, // end of function transaction.poolCheckin
poolCheckout: function(index) {
this.pool[ index ] = null;
this.poolBusy--;
_global_transactions_status_standby(index);
},
isPoolAllDone: function() {
return ( this.poolBusy > 0 ? false : true);
},
_getTransaction: function(task) {
var index = this.poolCheckin(task);
return ( index === false ? false : true );
},
getPage: function(task) {
task.url = task.url.replace(/^\s*/, '').replace(/\s*$/, ''); // trim
/*
var matches = url.split('?');
var baseUrl = matches[0];
var i = baseUrl.lastIndexOf('/');
if( i == baseUrl.indexOf('//')+1) {
baseUrl += '/';
}
else {
baseUrl = baseUrl.substring(0, i + 1 );
}
var host = baseUrl.substring(0, baseUrl.indexOf('/', baseUrl.indexOf('//')+2));
var anchor = {
'host': host,
'base': baseUrl,
'level': level,
'href': url
};*/
if(this._getTransaction(task) == false) {
this.taskQueue.push(task);
}
},
interval: null,
clearAll: function() {
this.clearTaskQueue();
this.clearComplishedQueue();
this.visitedUrls = {};
this.setPool(1);
if(this.interval != null) {
window.clearInterval( this.interval );
}
this.interval = null;
this.done = false;
},
travel: function() {
if(this.taskQueue.length <= 0) {
return false;
}
else {
if(this.interval == null) {
this.interval = window.setInterval('transactions.travel()', 500);
}
}
var task = this.taskQueue.pop();
this.getPage(task);
return true;
}
};
function checkGo(el) {
var urlTextBox = document.getElementById('url');
if(urlTextBox.value.length <= 0) {
return false;
}
disableCheckClick();
var t = document.getElementById('s').getElementsByTagName('li');
while(t.length > 0) {
document.getElementById('s').removeChild(t[0]);
}
document.getElementById('s').style.display = 'block';
var t = document.getElementById('brokenlink').getElementsByTagName('h3');
if(t.length) {
document.getElementById('brokenlink').removeChild(t[0]);
}
var t = document.getElementById('brokenlink').getElementsByTagName('ul');
if(t.length) {
document.getElementById('brokenlink').removeChild(t[0]);
}
document.getElementById('transactions_status').style.display = 'block';
document.getElementById('check').innerHTML = '執行中...';
transactions.clearAll();
transactions.taskQueue.push( new Task(0, urlTextBox.value) );
transactions.depth = document.getElementById('depth').value;
//if( goOutSide ) {
// transactions.urlBase = false;
//else {
transactions.urlBase = urlTextBox.value;
//}
transactions.setPool( document.getElementById('poolsize').value );
transactions.done = false;
transactions.travel();
return false;
}
function enableCheckClick() {
var elm = document.getElementById('check');
if (elm.attachEvent) { // IE
elm.attachEvent('onclick', checkGo);
}
else { // DOM
elm.addEventListener('click', checkGo, false);
}
}
function disableCheckClick() {
var elm = document.getElementById('check');
if (elm.detachEvent) { // IE
elm.detachEvent('onclick', checkGo);
}
else { // DOM
elm.removeEventListener('click', checkGo, false);
}
}
(function checkStandby() {
enableCheckClick();
document.getElementById('s').style.display = 'none';
document.getElementById('transactions_status').style.display = 'none';
document.getElementById('check').innerHTML = '確定';
})();
</script>
<!--?php
xoops_cp_footer();
?-->
proxy.php
因為 XmlHttpRequest 有相同網址資源存取限制,所以 Brokenlink detector 需要一個 proxy 以讀取其他網頁的內容。Proxy 可以用任何工具撰寫,只要記住 Brokenlink detector 總是以 ?url=連結網址
的形式調用 proxy。例如: proxy.php?url=連結網址
。
<?php
// Proxy for XmlHttpRequest of JavaScript
// Copyright (C) 2006 shirock
// ------------------------------------------------------------------------ //
// This program is free software; you can redistribute it and/or modify //
// it under the terms of the GNU General Public License as published by //
// the Free Software Foundation; either version 2 of the License, or //
// (at your option) any later version. //
// //
// You may not change or alter any portion of this comment or credits //
// of supporting developers from this source code or any supporting //
// source code which is considered copyrighted (c) material of the //
// original comment or credit authors. //
// //
// This program is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU General Public License for more details. //
// //
// You should have received a copy of the GNU General Public License //
// along with this program; if not, write to the Free Software //
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA //
// ------------------------------------------------------------------------- //
/* for Xoops
include '../../../mainfile.php';
if (!defined('XOOPS_ROOT_PATH')) {
header("HTTP/1.0 403", true, 403);
exit();
}
*/
function ProxyGetPage ($url, $opts) {
$ch = curl_init($url);
$response->header = array();
curl_setopt ($ch, CURLOPT_HTTPHEADER, $response->header);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (isset($opts['verifypeer']) ? $opts['verifypeer'] : false) );
curl_setopt($ch, CURLOPT_RETURNTRANSFER, (isset($opts['returntransfer']) ? $opts['returntransfer'] : true) );
//curl_setopt($ch, CURLOPT_VERBOSE, 1); ########### debug
//curl_setopt($ch, CURLOPT_USERAGENT, $agent);
curl_setopt($ch, CURLOPT_COOKIEJAR, (isset($opts['cookiejar']) ? $opts['cookiejar'] : XOOPS_ROOT_PATH.'/cache/curl_cookie') );
curl_setopt($ch, CURLOPT_COOKIEFILE, (isset($opts['cookiefile']) ? $opts['cookiefile'] : XOOPS_ROOT_PATH.'/cache/curl_cookie') );
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, (isset($opts['connecttimeout']) ? $opts['connecttimeout'] : 30) );
curl_setopt($ch, CURLOPT_FAILONERROR, (isset($opts['failonerror']) ? $opts['failonerror'] : true) );
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, (isset($opts['followlocation']) ? $opts['followlocation'] : true) );
if(isset($opts['post']) and $opts['post'] == true) {
curl_setopt($ch, CURLOPT_POST, true );
curl_setopt($ch, CUROPT_POSTFIELDS, $_POST);
}
else {
curl_setopt($ch, CURLOPT_POST, false );
}
$response->text = curl_exec ($ch);
if(curl_errno($ch)) {
$response->errno = curl_errno($ch);
$response->error = curl_error($ch);
}
else {
$response->errno = 0;
$response->error = '';
}
curl_close ($ch);
return $response;
}
/*===============================================================*/
/* for Xoops
require_once XOOPS_ROOT_PATH."/kernel/object.php";
$config_handler =& xoops_gethandler('config');
$criteria = new CriteriaCompo(new Criteria('conf_modid', 0));
$criteria->add(new Criteria('conf_catid', XOOPS_CONF_METAFOOTER));
$config =& $config_handler->getConfigs($criteria, true);
if (empty($xoopsUser)) {
header("HTTP/1.0 403", true, 403);
exit();
}
*/
$opts = array (
'returntransfer' => false,
'connecttimeout' => 10
);
if(!isset($_GET['url']) or empty($_GET['url'])) {
header("HTTP/1.0 404 ", true, 404);
exit();
}
else {
$url = $_GET['url'];
}
$response = ProxyGetPage($url, $opts);
if($response->errno != 0) {
switch($response->errno) {
case 22:
preg_match('/(.+): (\d{3})$/', $response->error, $ers);
header("HTTP/1.0 {$ers[2]} {$ers[1]}", true, $ers[2]);
//echo "HTTP/1.0 {$ers[2]} {$ers[1]}";
break;
default:
header("HTTP/1.0 404 {$response->error}", true, 404);
break;
}
}
?>
樂多舊網址: http://blog.roodo.com/rocksaying/archives/4052641.html