====== Orphans & Wanted pages ====== http://www.dokuwiki.org/plugin:orphanswanted Use this plugin to find orphan pages and wanted pages. OrphansWanted show which pages are: * Orphans (the page exists, but it has no links to it) * Wanted (the page does not exist, but there are link(s) to it elsewhere on the site) * Valid (the page exists, and it can be reached through a link) Each table shows the reference count. ===== Evolution ===== ==== version 2.5beta ==== **New version proposal** : 2010-02-18, version 2.5beta. \\ Some changes to do it working with the "useslash" configuration option. Changes the function "orph_Check_InternalLinks()" to use more dokuwiki functions to avoid unevolutive code. [!]~~ :: orphans | wanted | valid | all * [!] :: optional. prefix each with ! e.g., !wiki!comments:currentyear * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) * @author dae@douglasedmunds.com, (changes by Cyrille37 ) * Updated by Andy Webber to include comments from DokuWiki plugin page upto 2008-11-10 */ if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/'); require_once(DOKU_PLUGIN.'syntax.php'); require_once(DOKU_INC.'inc/search.php'); //------------------------------------- function orph_callback_search_wanted(&$data,$base,$file,$type,$lvl,$opts) { if($type == 'd'){ return true; // recurse all directories, but we don't store namespaces } if(!preg_match("/.*\.txt$/", $file)) { // Ignore everything but TXT return true; } // search the body of the file for links // dae mod // orph_Check_InternalLinks(&$data,$base,$file,$type,$lvl,$opts); orph_Check_InternalLinks($data,$base,$file,$type,$lvl,$opts); // get id of this file $id = pathID($file); //check ACL if(auth_quickaclcheck($id) < AUTH_READ) { return false; } // try to avoid making duplicate entries for forms and pages $item = &$data["$id"]; if(isset($item)) { // This item already has a member in the array // Note that the file search found it $item['exists'] = true; } else { // Create a new entry $data["$id"]=array('exists' => true, 'links' => 0); } return true; } function orph_handle_link( &$data, $link ) { if( isset($data[$link]) ) { // This item already has a member in the array // Note that the file search found it $data[$link]['links'] ++ ; // count the link // echo " \n"; } else { // Create a new entry $data[$link] = array( 'exists' => false, // Only found a link, not the file 'links' => 1 ); // echo " \n"; } } /** * Search for internal wiki links in page $file */ function orph_Check_InternalLinks( &$data, $base, $file, $type, $lvl, $opts ) { $dbg = false ; define('LINK_PATTERN', '%\[\[([^\]|#]*)(#[^\]|]*)?\|?([^\]]*)]]%'); if( ! preg_match("/.*\.txt$/", $file) ) { return ; } if( $dbg ) echo '

'.$file.'

' ; global $conf; // echo " \n"; $body = @file_get_contents($conf['datadir'] . $file); // ignores entries in , %%, and emails with @ foreach( array( '/.*<\/nowiki>/', '/%%.*%%/', '/.*<\/code>/' ) as $ignored ) { $body = preg_replace($ignored, '', $body); } $links = array(); preg_match_all( LINK_PATTERN, $body, $links ); foreach( $links[1] as $link ) { if( $dbg ) echo $link ; if( (0 < strlen(ltrim($link))) and ! preg_match('/^[a-zA-Z0-9\.]+>{1}.*$/u',$link) // Interwiki and ! preg_match('/^\\\\\\\\[\w.:?\-;,]+?\\\\/u',$link) // Windows Share and ! preg_match('#^([a-z0-9\-\.+]+?)://#i',$link) // external link (accepts all protocols) and ! preg_match('<'.PREG_PATTERN_VALID_EMAIL.'>',$link) // E-Mail (pattern above is defined in inc/mail.php) and ! preg_match('!^#.+!',$link) // inside page link (html anchor) ) { $pageExists=false; resolve_pageid(false,$link,$pageExists ); //echo 'link='.$link.' '.($pageExists?'EXISTS':'MISS').'
'; if(((strlen(ltrim($link)) > 0) // there IS an id? and !auth_quickaclcheck($link) < AUTH_READ)) { // should be visible to user //echo " \n"; if( $dbg ) echo ' A_LINK' ; $link= strtolower( $link ); orph_handle_link($data, $link); } else { if( $dbg ) echo ' EMPTY_OR_FORBIDDEN' ; } } // link is not empty and is a local link? else { if( $dbg ) echo ' NOT_INTERNAL'; } if( $dbg ) echo "
\n"; } // end of foreach link } // -------------------- /** * All DokuWiki plugins to extend the parser/rendering mechanism * need to inherit from this class */ class syntax_plugin_orphanswanted extends DokuWiki_Syntax_Plugin { /** * return some info */ function getInfo(){ return array( 'author' => 'Doug Edmunds', 'email' => 'dae@douglasedmunds.com', 'date' => '2010-02-18', 'name' => 'OrphansWanted Plugin ver 2.5beta (changes by Cyrille37 )', 'desc' => 'Find orphan pages and wanted pages . syntax ~~ORPHANSWANTED:[!]~~ . :: orphans|wanted|valid|all . are optional, start each namespace with !' , 'url' => 'http://wiki.splitbrain.org/plugin:orphanswanted', ); } /** * What kind of syntax are we? */ function getType(){ return 'substition'; } /** * What about paragraphs? */ function getPType(){ return 'normal'; } /** * Where to sort in? */ function getSort(){ return 990; //was 990 } /** * Connect pattern to lexer */ function connectTo($mode) { $this->Lexer->addSpecialPattern('~~ORPHANSWANTED:[0-9a-zA-Z:!]+~~',$mode,'plugin_orphanswanted'); } /** * Handle the match */ function handle($match, $state, $pos, &$handler){ $match_array = array(); $match = substr($match,16,-2); //strip ~~ORPHANSWANTED: from start and ~~ from end // Wolfgang 2007-08-29 suggests commenting out the next line $match = strtolower($match); //create array, using ! as separator $match_array = explode("!", $match); // $match_array[0] will be orphan, wanted, valid, all, or syntax error // if there are excluded namespaces, they will be in $match_array[1] .. [x] // this return value appears in render() as the $data param there return $match_array; } /** * Create output */ function render($format, &$renderer, $data) { global $INFO, $conf; if($format == 'xhtml'){ // user needs to add ~~NOCACHE~~ manually to page, to assure ACL rules are followed // coding here is too late, it doesn't get parsed // $renderer->doc .= "~~NOCACHE~~"; // $data is an array // $data[1]..[x] are excluded namespaces, $data[0] is the report type //handle choices switch ($data[0]){ case 'orphans': $renderer->doc .= $this->orphan_pages($data); break; case 'wanted': $renderer->doc .= $this->wanted_pages($data); break; case 'valid': $renderer->doc .= $this->valid_pages($data); break; case 'all': $renderer->doc .= $this->all_pages($data); break; default: $renderer->doc .= "ORPHANSWANTED syntax error"; // $renderer->doc .= "syntax ~~ORPHANSWANTED:~~ :: orphans|wanted|valid|all Ex: ~~ORPHANSWANTED:valid~~"; } return true; } return false; } // three choices // $params_array used to extract excluded namespaces for report // orphans = orph_report_table($data, true, false, $params_array); // wanted = orph_report_table($data, false, true), $params_array; // valid = orph_report_table($data, true, true, $params_array); function orphan_pages($params_array) { global $conf; $result = ''; $data = array(); search($data,$conf['datadir'],'orph_callback_search_wanted',array('ns' => $ns)); $result .= $this->orph_report_table($data, true, false,$params_array); return $result; } function wanted_pages($params_array) { global $conf; $result = ''; $data = array(); search($data,$conf['datadir'],'orph_callback_search_wanted',array('ns' => $ns)); $result .= $this->orph_report_table($data, false, true,$params_array); return $result; } function valid_pages($params_array) { global $conf; $result = ''; $data = array(); search($data,$conf['datadir'],'orph_callback_search_wanted',array('ns' => $ns)); $result .= $this->orph_report_table($data, true, true, $params_array); return $result; } function all_pages($params_array) { global $conf; $result = ''; $data = array(); search($data,$conf['datadir'],'orph_callback_search_wanted',array('ns' => $ns)); $result .= "

Orphans

"; $result .= $this->orph_report_table($data, true, false,$params_array); $result .= "

Wanted

"; $result .= $this->orph_report_table($data, false, true,$params_array); $result .= "

Valid

"; $result .= $this->orph_report_table($data, true, true, $params_array); return $result; } function orph_report_table( $data, $page_exists, $has_links, $params_array ) { global $conf; $show_heading = ($page_exists && $conf['useheading']) ? true : false ; //take off $params_array[0]; $exclude_array = array_slice($params_array,1); $count = 1; $output = ''; // for valid html - need to close the

that is feed before this $output .= '

'; $output .= '' . ($show_heading ? '' : '' ) . '' ."\n" ; arsort($data); foreach($data as $id=>$item) { if( ! (($item['exists'] == $page_exists) and (($item['links'] <> 0)== $has_links)) ) { continue ; } // $id is a string, looks like this: page, namespace:page, or namespace::page $match_array = explode(":", $id); //remove last item in array, the page identifier $match_array = array_slice($match_array, 0, -1); //put it back together $page_namespace = implode (":", $match_array); //add a trailing : $page_namespace = $page_namespace . ':'; //set it to show, unless blocked by exclusion list $show_it = true; foreach ($exclude_array as $exclude_item) { //add a trailing : to each $item too $exclude_item = $exclude_item . ":"; // need === to avoid boolean false // strpos(haystack, needle) // if exclusion is beginning of page's namespace , block it if (strpos($page_namespace, $exclude_item) === 0){ //there is a match, so block it $show_it = false; } } if( $show_it ) { $output .= "' . ($show_heading ? '' : '' ) . '\n"; $count++; } } $output .= "
# ID TitleLinks
$count" . $id .'' . hsc(p_get_first_heading($id)) .'' . $item['links'] . ($has_links ? " : Show backlinks" : '' ) . "
\n"; //for valid html = need to reopen a

$output .= '

'; return $output; } } ?>