plural-comparison.php
Go to the documentation of this file.00001 <?php
00012
00013 if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
00014 $IP = getenv( 'MW_INSTALL_PATH' );
00015 } else {
00016 $dir = __DIR__;
00017 $IP = "$dir/../../..";
00018 }
00019 require_once "$IP/maintenance/Maintenance.php";
00020
00022 class PluralCompare extends Maintenance {
00023 public function __construct() {
00024 parent::__construct();
00025 $this->mDescription = 'Script for comparing different plural implementations.';
00026 }
00027
00028 public function execute() {
00029 $mwLanguages = $this->loadMediaWiki();
00030 $gtLanguages = $this->loadGettext();
00031 $clLanguages = $this->loadCLDR();
00032
00033 $all = Language::fetchLanguageNames( null, 'all' );
00034 $allkeys = array_keys( $all + $mwLanguages + $gtLanguages + $clLanguages );
00035 sort( $allkeys );
00036
00037 $this->output( sprintf( "%12s %3s %3s %4s\n", 'Code', 'MW', 'Get', 'CLDR' ) );
00038 foreach ( $allkeys as $code ) {
00039 $mw = isset( $mwLanguages[$code] ) ? '+' : '';
00040 $gt = isset( $gtLanguages[$code] ) ? '+' : '';
00041 $cl = isset( $clLanguages[$code] ) ? '+' : '';
00042
00043 if ( $mw === '' ) {
00044 $fallbacks = Language::getFallbacksFor( $code );
00045 foreach ( $fallbacks as $fcode ) {
00046 if ( $fcode !== 'en' && isset( $mwLanguages[$fcode] ) ) {
00047 $mw = '.';
00048 }
00049 }
00050 }
00051
00052 $error = '';
00053 if ( substr_count( sprintf( '%s%s%s', $mw, $gt, $cl ), '+' ) > 1 ) {
00054 $error = $this->tryMatch( $code, $mw, $gtLanguages, $clLanguages );
00055 }
00056
00057 $this->output( sprintf( "%12s %-3s %-3s %-4s %s\n", $code, $mw, $gt, $cl, $error ) );
00058 }
00059 }
00060
00061 protected function tryMatch( $code, $mws, $gtLanguages, $clLanguages ) {
00062 if ( $mws !== '' ) {
00063 $mwExp = true;
00064 $lang = Language::factory( $code );
00065 } else {
00066 $mwExp = false;
00067 }
00068
00069 if ( isset( $gtLanguages[$code] ) ) {
00070 $gtExp = 'return (int) ' . str_replace( 'n', '$i', $gtLanguages[$code] ) . ';';
00071 } else {
00072 $gtExp = false;
00073 }
00074
00075 if ( isset( $clLanguages[$code] ) ) {
00076 $cldrExp = $clLanguages[$code];
00077 } else {
00078 $cldrExp = false;
00079 }
00080
00081 for ( $i = 0; $i <= 250; $i++ ) {
00082 $mw = $gt = $cl = '?';
00083
00084 if ( $mwExp ) {
00085 $exp = $lang->getCompiledPluralRules();
00086 $mw = CLDRPluralRuleEvaluator::evaluateCompiled( $i, $exp );
00087 }
00088
00089 if ( $gtExp ) {
00090 $gt = eval( $gtExp );
00091 }
00092
00093 if ( $cldrExp ) {
00094 $cl = CLDRPluralRuleEvaluator::evaluate( $i, $cldrExp );
00095 }
00096
00097 if ( self::comp( $mw, $gt ) && self::comp( $gt, $cl ) && self::comp( $cl, $mw ) ) {
00098 continue;
00099 }
00100
00101 return "$i: $mw $gt $cl";
00102 }
00103
00104 return '';
00105 }
00106
00107 public static function comp( $a, $b ) {
00108 return $a === '?' || $b === '?' || $a === $b;
00109 }
00110
00111 protected function loadPluralFile( $fileName ) {
00112 $doc = new DOMDocument;
00113 $doc->load( $fileName );
00114 $rulesets = $doc->getElementsByTagName( "pluralRules" );
00115 $plurals = array();
00116 foreach ( $rulesets as $ruleset ) {
00117 $codes = $ruleset->getAttribute( 'locales' );
00118 $rules = array();
00119 $ruleElements = $ruleset->getElementsByTagName( "pluralRule" );
00120 foreach ( $ruleElements as $elt ) {
00121 $rules[] = $elt->nodeValue;
00122 }
00123 foreach ( explode( ' ', $codes ) as $code ) {
00124 $plurals[$code] = $rules;
00125 }
00126 }
00127
00128 return $plurals;
00129 }
00130
00131 public function loadCLDR() {
00132 global $IP;
00133
00134 return $this->loadPluralFile( "$IP/languages/data/plurals.xml" );
00135 }
00136
00137 public function loadMediaWiki() {
00138 global $IP;
00139 $rules = $this->loadPluralFile( "$IP/languages/data/plurals.xml" );
00140 $rulesMW = $this->loadPluralFile( "$IP/languages/data/plurals-mediawiki.xml" );
00141
00142 return array_merge( $rules, $rulesMW );
00143 }
00144
00145 public function loadGettext() {
00146 $gtData = file_get_contents( __DIR__ . '/../data/plural-gettext.txt' );
00147 $gtLanguages = array();
00148 foreach ( preg_split( '/\n|\r/', $gtData, -1, PREG_SPLIT_NO_EMPTY ) as $line ) {
00149 list( $code, $rule ) = explode( "\t", $line );
00150 $rule = preg_replace( '/^.*?plural=/', '', $rule );
00151 $gtLanguages[$code] = $rule;
00152 }
00153
00154 return $gtLanguages;
00155 }
00156 }
00157
00158 $maintClass = 'PluralCompare';
00159 require_once DO_MAINTENANCE;