====== Orphans & Wanted pages ======
http://www.dokuwiki.org/plugin:orphanswanted
Use this plugin to find orphan pages and wanted pages.
OrphansWanted show which pages are:
* Orphans (the page exists, but it has no links to it)
* Wanted (the page does not exist, but there are link(s) to it elsewhere on the site)
* Valid (the page exists, and it can be reached through a link)
Each table shows the reference count.
===== Evolution =====
==== version 2.5beta ====
**New version proposal** : 2010-02-18, version 2.5beta. \\
Some changes to do it working with the "useslash" configuration option. Changes the function "orph_Check_InternalLinks()" to use more dokuwiki functions to avoid unevolutive code.
[!]~~ :: orphans | wanted | valid | all
* [!] :: optional. prefix each with ! e.g., !wiki!comments:currentyear
* @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
* @author dae@douglasedmunds.com, (changes by Cyrille37 )
* Updated by Andy Webber to include comments from DokuWiki plugin page upto 2008-11-10
*/
if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/');
if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/');
require_once(DOKU_PLUGIN.'syntax.php');
require_once(DOKU_INC.'inc/search.php');
//-------------------------------------
function orph_callback_search_wanted(&$data,$base,$file,$type,$lvl,$opts) {
if($type == 'd'){
return true; // recurse all directories, but we don't store namespaces
}
if(!preg_match("/.*\.txt$/", $file)) { // Ignore everything but TXT
return true;
}
// search the body of the file for links
// dae mod
// orph_Check_InternalLinks(&$data,$base,$file,$type,$lvl,$opts);
orph_Check_InternalLinks($data,$base,$file,$type,$lvl,$opts);
// get id of this file
$id = pathID($file);
//check ACL
if(auth_quickaclcheck($id) < AUTH_READ) {
return false;
}
// try to avoid making duplicate entries for forms and pages
$item = &$data["$id"];
if(isset($item)) {
// This item already has a member in the array
// Note that the file search found it
$item['exists'] = true;
} else {
// Create a new entry
$data["$id"]=array('exists' => true,
'links' => 0);
}
return true;
}
function orph_handle_link( &$data, $link )
{
if( isset($data[$link]) )
{
// This item already has a member in the array
// Note that the file search found it
$data[$link]['links'] ++ ; // count the link
// echo " \n";
} else {
// Create a new entry
$data[$link] = array(
'exists' => false, // Only found a link, not the file
'links' => 1
);
// echo " \n";
}
}
/**
* Search for internal wiki links in page $file
*/
function orph_Check_InternalLinks( &$data, $base, $file, $type, $lvl, $opts )
{
$dbg = false ;
define('LINK_PATTERN', '%\[\[([^\]|#]*)(#[^\]|]*)?\|?([^\]]*)]]%');
if( ! preg_match("/.*\.txt$/", $file) )
{
return ;
}
if( $dbg ) echo ''.$file.'
' ;
global $conf;
// echo " \n";
$body = @file_get_contents($conf['datadir'] . $file);
// ignores entries in , %%, and emails with @
foreach( array(
'/.*<\/nowiki>/',
'/%%.*%%/',
'/.*<\/code>/'
)
as $ignored )
{
$body = preg_replace($ignored, '', $body);
}
$links = array();
preg_match_all( LINK_PATTERN, $body, $links );
foreach( $links[1] as $link )
{
if( $dbg ) echo $link ;
if( (0 < strlen(ltrim($link)))
and ! preg_match('/^[a-zA-Z0-9\.]+>{1}.*$/u',$link) // Interwiki
and ! preg_match('/^\\\\\\\\[\w.:?\-;,]+?\\\\/u',$link) // Windows Share
and ! preg_match('#^([a-z0-9\-\.+]+?)://#i',$link) // external link (accepts all protocols)
and ! preg_match('<'.PREG_PATTERN_VALID_EMAIL.'>',$link) // E-Mail (pattern above is defined in inc/mail.php)
and ! preg_match('!^#.+!',$link) // inside page link (html anchor)
) {
$pageExists=false;
resolve_pageid(false,$link,$pageExists );
//echo 'link='.$link.' '.($pageExists?'EXISTS':'MISS').'
';
if(((strlen(ltrim($link)) > 0) // there IS an id?
and !auth_quickaclcheck($link) < AUTH_READ)) { // should be visible to user
//echo " \n";
if( $dbg ) echo ' A_LINK' ;
$link= strtolower( $link );
orph_handle_link($data, $link);
}
else
{
if( $dbg ) echo ' EMPTY_OR_FORBIDDEN' ;
}
} // link is not empty and is a local link?
else
{
if( $dbg ) echo ' NOT_INTERNAL';
}
if( $dbg ) echo "
\n";
} // end of foreach link
}
// --------------------
/**
* All DokuWiki plugins to extend the parser/rendering mechanism
* need to inherit from this class
*/
class syntax_plugin_orphanswanted extends DokuWiki_Syntax_Plugin {
/**
* return some info
*/
function getInfo(){
return array(
'author' => 'Doug Edmunds',
'email' => 'dae@douglasedmunds.com',
'date' => '2010-02-18',
'name' => 'OrphansWanted Plugin ver 2.5beta (changes by Cyrille37 )',
'desc' => 'Find orphan pages and wanted pages .
syntax ~~ORPHANSWANTED:[!]~~ .
:: orphans|wanted|valid|all .
are optional, start each namespace with !' ,
'url' => 'http://wiki.splitbrain.org/plugin:orphanswanted',
);
}
/**
* What kind of syntax are we?
*/
function getType(){
return 'substition';
}
/**
* What about paragraphs?
*/
function getPType(){
return 'normal';
}
/**
* Where to sort in?
*/
function getSort(){
return 990; //was 990
}
/**
* Connect pattern to lexer
*/
function connectTo($mode) {
$this->Lexer->addSpecialPattern('~~ORPHANSWANTED:[0-9a-zA-Z:!]+~~',$mode,'plugin_orphanswanted');
}
/**
* Handle the match
*/
function handle($match, $state, $pos, &$handler){
$match_array = array();
$match = substr($match,16,-2); //strip ~~ORPHANSWANTED: from start and ~~ from end
// Wolfgang 2007-08-29 suggests commenting out the next line
$match = strtolower($match);
//create array, using ! as separator
$match_array = explode("!", $match);
// $match_array[0] will be orphan, wanted, valid, all, or syntax error
// if there are excluded namespaces, they will be in $match_array[1] .. [x]
// this return value appears in render() as the $data param there
return $match_array;
}
/**
* Create output
*/
function render($format, &$renderer, $data) {
global $INFO, $conf;
if($format == 'xhtml'){
// user needs to add ~~NOCACHE~~ manually to page, to assure ACL rules are followed
// coding here is too late, it doesn't get parsed
// $renderer->doc .= "~~NOCACHE~~";
// $data is an array
// $data[1]..[x] are excluded namespaces, $data[0] is the report type
//handle choices
switch ($data[0]){
case 'orphans':
$renderer->doc .= $this->orphan_pages($data);
break;
case 'wanted':
$renderer->doc .= $this->wanted_pages($data);
break;
case 'valid':
$renderer->doc .= $this->valid_pages($data);
break;
case 'all':
$renderer->doc .= $this->all_pages($data);
break;
default:
$renderer->doc .= "ORPHANSWANTED syntax error";
// $renderer->doc .= "syntax ~~ORPHANSWANTED:~~ :: orphans|wanted|valid|all Ex: ~~ORPHANSWANTED:valid~~";
}
return true;
}
return false;
}
// three choices
// $params_array used to extract excluded namespaces for report
// orphans = orph_report_table($data, true, false, $params_array);
// wanted = orph_report_table($data, false, true), $params_array;
// valid = orph_report_table($data, true, true, $params_array);
function orphan_pages($params_array) {
global $conf;
$result = '';
$data = array();
search($data,$conf['datadir'],'orph_callback_search_wanted',array('ns' => $ns));
$result .= $this->orph_report_table($data, true, false,$params_array);
return $result;
}
function wanted_pages($params_array) {
global $conf;
$result = '';
$data = array();
search($data,$conf['datadir'],'orph_callback_search_wanted',array('ns' => $ns));
$result .= $this->orph_report_table($data, false, true,$params_array);
return $result;
}
function valid_pages($params_array) {
global $conf;
$result = '';
$data = array();
search($data,$conf['datadir'],'orph_callback_search_wanted',array('ns' => $ns));
$result .= $this->orph_report_table($data, true, true, $params_array);
return $result;
}
function all_pages($params_array) {
global $conf;
$result = '';
$data = array();
search($data,$conf['datadir'],'orph_callback_search_wanted',array('ns' => $ns));
$result .= "
Orphans
";
$result .= $this->orph_report_table($data, true, false,$params_array);
$result .= "
Wanted
";
$result .= $this->orph_report_table($data, false, true,$params_array);
$result .= "
Valid
";
$result .= $this->orph_report_table($data, true, true, $params_array);
return $result;
}
function orph_report_table( $data, $page_exists, $has_links, $params_array )
{
global $conf;
$show_heading = ($page_exists && $conf['useheading']) ? true : false ;
//take off $params_array[0];
$exclude_array = array_slice($params_array,1);
$count = 1;
$output = '';
// for valid html - need to close the
that is feed before this
$output .= '
';
$output .= ' # | ID | '
. ($show_heading ? 'Title | ' : '' )
. 'Links |
'
."\n" ;
arsort($data);
foreach($data as $id=>$item)
{
if( ! (($item['exists'] == $page_exists) and (($item['links'] <> 0)== $has_links)) )
{
continue ;
}
// $id is a string, looks like this: page, namespace:page, or namespace::page
$match_array = explode(":", $id);
//remove last item in array, the page identifier
$match_array = array_slice($match_array, 0, -1);
//put it back together
$page_namespace = implode (":", $match_array);
//add a trailing :
$page_namespace = $page_namespace . ':';
//set it to show, unless blocked by exclusion list
$show_it = true;
foreach ($exclude_array as $exclude_item)
{
//add a trailing : to each $item too
$exclude_item = $exclude_item . ":";
// need === to avoid boolean false
// strpos(haystack, needle)
// if exclusion is beginning of page's namespace , block it
if (strpos($page_namespace, $exclude_item) === 0){
//there is a match, so block it
$show_it = false;
}
}
if( $show_it )
{
$output .= "$count | "
. $id .' | '
. ($show_heading ? '' . hsc(p_get_first_heading($id)) .' | ' : '' )
. '' . $item['links']
. ($has_links
? " : Show backlinks"
: ''
)
. " |
\n";
$count++;
}
}
$output .= "
\n";
//for valid html = need to reopen a
$output .= '
';
return $output;
}
}
?>