plural-comparison.php

Go to the documentation of this file.
00001 <?php
00012 // Standard boilerplate to define $IP
00013 if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
00014     $IP = getenv( 'MW_INSTALL_PATH' );
00015 } else {
00016     $dir = __DIR__;
00017     $IP = "$dir/../../..";
00018 }
00019 require_once "$IP/maintenance/Maintenance.php";
00020 
00022 class PluralCompare extends Maintenance {
00023     public function __construct() {
00024         parent::__construct();
00025         $this->mDescription = 'Script for comparing different plural implementations.';
00026     }
00027 
00028     public function execute() {
00029         $mwLanguages = $this->loadMediaWiki();
00030         $gtLanguages = $this->loadGettext();
00031         $clLanguages = $this->loadCLDR();
00032 
00033         $all = Language::fetchLanguageNames( null, 'all' );
00034         $allkeys = array_keys( $all + $mwLanguages + $gtLanguages + $clLanguages );
00035         sort( $allkeys );
00036 
00037         $this->output( sprintf( "%12s %3s %3s %4s\n", 'Code', 'MW', 'Get', 'CLDR' ) );
00038         foreach ( $allkeys as $code ) {
00039             $mw = isset( $mwLanguages[$code] ) ? '+' : '';
00040             $gt = isset( $gtLanguages[$code] ) ? '+' : '';
00041             $cl = isset( $clLanguages[$code] ) ? '+' : '';
00042 
00043             if ( $mw === '' ) {
00044                 $fallbacks = Language::getFallbacksFor( $code );
00045                 foreach ( $fallbacks as $fcode ) {
00046                     if ( $fcode !== 'en' && isset( $mwLanguages[$fcode] ) ) {
00047                         $mw = '.';
00048                     }
00049                 }
00050             }
00051 
00052             $error = '';
00053             if ( substr_count( sprintf( '%s%s%s', $mw, $gt, $cl ), '+' ) > 1 ) {
00054                 $error = $this->tryMatch( $code, $mw, $gtLanguages, $clLanguages );
00055             }
00056 
00057             $this->output( sprintf( "%12s %-3s %-3s %-4s %s\n", $code, $mw, $gt, $cl, $error ) );
00058         }
00059     }
00060 
00061     protected function tryMatch( $code, $mws, $gtLanguages, $clLanguages ) {
00062         if ( $mws !== '' ) {
00063             $mwExp = true;
00064             $lang = Language::factory( $code );
00065         } else {
00066             $mwExp = false;
00067         }
00068 
00069         if ( isset( $gtLanguages[$code] ) ) {
00070             $gtExp = 'return (int) ' . str_replace( 'n', '$i', $gtLanguages[$code] ) . ';';
00071         } else {
00072             $gtExp = false;
00073         }
00074 
00075         if ( isset( $clLanguages[$code] ) ) {
00076             $cldrExp = $clLanguages[$code];
00077         } else {
00078             $cldrExp = false;
00079         }
00080 
00081         for ( $i = 0; $i <= 250; $i++ ) {
00082             $mw = $gt = $cl = '?';
00083 
00084             if ( $mwExp ) {
00085                 $exp = $lang->getCompiledPluralRules();
00086                 $mw = CLDRPluralRuleEvaluator::evaluateCompiled( $i, $exp );
00087             }
00088 
00089             if ( $gtExp ) {
00090                 $gt = eval( $gtExp );
00091             }
00092 
00093             if ( $cldrExp ) {
00094                 $cl = CLDRPluralRuleEvaluator::evaluate( $i, $cldrExp );
00095             }
00096 
00097             if ( self::comp( $mw, $gt ) && self::comp( $gt, $cl ) && self::comp( $cl, $mw ) ) {
00098                 continue;
00099             }
00100 
00101             return "$i: $mw $gt $cl";
00102         }
00103 
00104         return '';
00105     }
00106 
00107     public static function comp( $a, $b ) {
00108         return $a === '?' || $b === '?' || $a === $b;
00109     }
00110 
00111     protected function loadPluralFile( $fileName ) {
00112         $doc = new DOMDocument;
00113         $doc->load( $fileName );
00114         $rulesets = $doc->getElementsByTagName( "pluralRules" );
00115         $plurals = array();
00116         foreach ( $rulesets as $ruleset ) {
00117             $codes = $ruleset->getAttribute( 'locales' );
00118             $rules = array();
00119             $ruleElements = $ruleset->getElementsByTagName( "pluralRule" );
00120             foreach ( $ruleElements as $elt ) {
00121                 $rules[] = $elt->nodeValue;
00122             }
00123             foreach ( explode( ' ', $codes ) as $code ) {
00124                 $plurals[$code] = $rules;
00125             }
00126         }
00127 
00128         return $plurals;
00129     }
00130 
00131     public function loadCLDR() {
00132         global $IP;
00133 
00134         return $this->loadPluralFile( "$IP/languages/data/plurals.xml" );
00135     }
00136 
00137     public function loadMediaWiki() {
00138         global $IP;
00139         $rules = $this->loadPluralFile( "$IP/languages/data/plurals.xml" );
00140         $rulesMW = $this->loadPluralFile( "$IP/languages/data/plurals-mediawiki.xml" );
00141 
00142         return array_merge( $rules, $rulesMW );
00143     }
00144 
00145     public function loadGettext() {
00146         $gtData = file_get_contents( __DIR__ . '/../data/plural-gettext.txt' );
00147         $gtLanguages = array();
00148         foreach ( preg_split( '/\n|\r/', $gtData, -1, PREG_SPLIT_NO_EMPTY ) as $line ) {
00149             list( $code, $rule ) = explode( "\t", $line );
00150             $rule = preg_replace( '/^.*?plural=/', '', $rule );
00151             $gtLanguages[$code] = $rule;
00152         }
00153 
00154         return $gtLanguages;
00155     }
00156 }
00157 
00158 $maintClass = 'PluralCompare';
00159 require_once DO_MAINTENANCE;
Generated on Tue Oct 29 00:00:24 2013 for MediaWiki Translate Extension by  doxygen 1.6.3