Difference between revisions of "URL Whitelist MediaWiki Modification"

(Created page with "The following code was created by Daniel Robbins to provide URL Whitelist functionality for MediaWiki. This code allows you to create a page named "URLWhitelist" which contain...")
 
(Created page with "The following code was created by Daniel Robbins to provide URL Whitelist functionality for MediaWiki. This code allows you to create a page named "URLWhitelist" which contain...")
(No difference)

Revision as of 02:52, 28 August 2012

The following code was created by Daniel Robbins to provide URL Whitelist functionality for MediaWiki. This code allows you to create a page named "URLWhitelist" which contains lists of domains that users can link against. All domains not listed on the URLWhitelist page will result in a Spam Protection error when saving any edits to a page.

On the URLWhitelist page, the wikitext should contain one domain per line, and sub-paths are fine, such as this:

google.com
foo.bar.com/oni

Above, the URLWhitelist will allow http(s)://[domain.]google.com/* and http(s)://[domain.]foo.bar.com/oni[/optional-path]

The following patch will apply against MediaWiki-1.19.1:

--- mediawiki-1.19.1/includes/EditPage.php      2012-06-13 18:22:38.000000000 +0000
+++ public_html/includes/EditPage.php   2012-08-27 04:53:33.073996581 +0000
@@ -1081,7 +1081,9 @@
                # Check for spam
                $match = self::matchSummarySpamRegex( $this->summary );
                if ( $match === false ) {
-                       $match = self::matchSpamRegex( $this->textbox1 );
+                       /* Don't run spam regex matching on the URLWhitelist page itself: */
+                       if ($this->mTitle->getText() != "URLWhitelist")
+                               $match = self::matchSpamRegex( $this->textbox1 );
                }
                if ( $match !== false ) {
                        $result['spam'] = $match;
@@ -1558,7 +1560,7 @@
                global $wgSpamRegex;
                // For back compatibility, $wgSpamRegex may be a single string or an array of regexes.
                $regexes = (array)$wgSpamRegex;
-               return self::matchSpamRegexInternal( $text, $regexes );
+               return self::matchURLWhitelistInternal( $text, $regexes );
        }
 
        /**
@@ -1571,7 +1573,7 @@
        public static function matchSummarySpamRegex( $text ) {
                global $wgSummarySpamRegex;
                $regexes = (array)$wgSummarySpamRegex;
-               return self::matchSpamRegexInternal( $text, $regexes );
+               return self::matchURLWhitelistInternal( $text, $regexes );
        }
 
        /**
@@ -1589,6 +1591,46 @@
                return false;
        }
 
+       protected static function matchURLWhitelistInternal( $text, $regexes ) {
+               global $wgUser;
+               /*if ( in_array('sysop', $wgUser->getGroups())) {
+                       return false;
+               }*/
+               $a = new Article( Title::newFromText('URLWhitelist'));
+               $stuff = $a->getContent();
+               $foo = preg_split('/\s+/', $stuff);
+               /*$httpreg = ",http://[^\s/]*(?!";*/
+               $httpreg = ",https?://(?!";
+               $count = 0;
+               foreach($foo as $reg) {
+                       $reg = str_replace(".","\.",$reg);
+                       $reg = "\S*" . $reg;
+                       if ($count != 0 )
+                               $httpreg .= "|";
+                       $httpreg .= $reg;
+                       $count = $count + 1;
+               }
+               $httpreg .= "),";
+               $hmatches = array();
+               /* URL match with whitelist */
+               if ( preg_match( $httpreg, $text, $hmatches ) ) {
+                       return "non-whitelisted http/https URL";
+               }
+               /*
+               for testing:
+               if ( $wgUser->getName() == "Drobbins" )
+                       return $httpreg;
+               */
+               foreach( $regexes as $regex ) {
+                       $matches = array();
+                       if( preg_match( $regex, $text, $matches ) ) {
+                               return $matches[0];
+                       }
+               }
+               return false;
+       }
+
+
        function setHeaders() {
                global $wgOut, $wgUser;