00001 <?php
00014
00015 if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
00016 $IP = getenv( 'MW_INSTALL_PATH' );
00017 } else {
00018 $dir = __DIR__;
00019 $IP = "$dir/../../..";
00020 }
00021 require_once "$IP/maintenance/Maintenance.php";
00022
00023 class GroupStatictics extends Maintenance {
00035 public $mostSpokenLanguages = array(
00036 'en' => array( 1, 1500, 'multiple' ),
00037 'zh-hans' => array( 2, 1300, 'asia' ),
00038 'zh-hant' => array( 2, 1300, 'asia' ),
00039 'hi' => array( 3, 550, 'asia' ),
00040 'ar' => array( 4, 530, 'multiple' ),
00041 'es' => array( 5, 500, 'multiple' ),
00042 'ms' => array( 6, 300, 'asia' ),
00043 'pt' => array( 7, 290, 'multiple' ),
00044 'pt-br' => array( 7, 290, 'america' ),
00045 'ru' => array( 8, 278, 'multiple' ),
00046 'id' => array( 9, 250, 'asia' ),
00047 'bn' => array( 10, 230, 'asia' ),
00048 'fr' => array( 11, 200, 'multiple' ),
00049 'de' => array( 12, 185, 'europe' ),
00050 'ja' => array( 13, 132, 'asia' ),
00051 'fa' => array( 14, 107, 'asia' ),
00052 'pnb' => array( 15, 104, 'asia' ),
00053 'tl' => array( 16, 90, 'asia' ),
00054 'mr' => array( 17, 90, 'asia' ),
00055 'vi' => array( 18, 80, 'asia' ),
00056 'jv' => array( 19, 80, 'asia' ),
00057 'te' => array( 20, 80, 'asia' ),
00058 'ko' => array( 21, 78, 'asia' ),
00059 'wuu' => array( 22, 77, 'asia' ),
00060 'arz' => array( 23, 76, 'africa' ),
00061 'th' => array( 24, 73, 'asia' ),
00062 'yue' => array( 25, 71, 'asia' ),
00063 'tr' => array( 26, 70, 'multiple' ),
00064 'it' => array( 27, 70, 'europe' ),
00065 'ta' => array( 28, 66, 'asia' ),
00066 'ur' => array( 29, 60, 'asia' ),
00067 'my' => array( 30, 52, 'asia' ),
00068 'sw' => array( 31, 50, 'africa' ),
00069 'nan' => array( 32, 49, 'asia' ),
00070 'kn' => array( 33, 47, 'asia' ),
00071 'gu' => array( 34, 46, 'asia' ),
00072 'uk' => array( 35, 45, 'europe' ),
00073 'pl' => array( 36, 43, 'europe' ),
00074 'sd' => array( 37, 41, 'asia' ),
00075 'ha' => array( 38, 39, 'africa' ),
00076 'ml' => array( 39, 37, 'asia' ),
00077 'gan-hans' => array( 40, 35, 'asia' ),
00078 'gan-hant' => array( 40, 35, 'asia' ),
00079 'hak' => array( 41, 34, 'asia' ),
00080 'or' => array( 42, 31, 'asia' ),
00081 'ne' => array( 43, 30, 'asia' ),
00082 'ro' => array( 44, 28, 'europe' ),
00083 'su' => array( 45, 27, 'asia' ),
00084 'az' => array( 46, 27, 'asia' ),
00085 'nl' => array( 47, 27, 'europe' ),
00086 'zu' => array( 48, 26, 'africa' ),
00087 'ps' => array( 49, 26, 'asia' ),
00088 'ckb' => array( 50, 26, 'asia' ),
00089 'ku-latn' => array( 50, 26, 'asia' ),
00090 );
00091
00102 public $localisedWeights = array(
00103 'wikimedia' => array(
00104 'core-0-mostused' => 40,
00105 'core' => 30,
00106 'ext-0-wikimedia' => 30
00107 ),
00108 'fundraiser' => array(
00109 'ext-di-di' => 16,
00110 'ext-di-pfpg' => 84,
00111 ),
00112 'mediawiki' => array(
00113 'core-0-mostused' => 30,
00114 'core' => 30,
00115 'ext-0-wikimedia' => 20,
00116 'ext-0-all' => 20
00117 )
00118 );
00119
00125 public $wikimediaCodeMap = array(
00126
00127 'bat-smg' => 'bat-smg',
00128 'cbk-zam' => 'cbk-zam',
00129 'map-bms' => 'map-bms',
00130 'nds-nl' => 'nds-nl',
00131 'roa-rup' => 'roa-rup',
00132 'roa-tara' => 'roa-tara',
00133
00134
00135 'be-tarask' => 'be-x-old',
00136 'gsw' => 'als',
00137 'ike-cans' => 'iu',
00138 'ike-latn' => 'iu',
00139 'lzh' => 'zh-classical',
00140 'nan' => 'zh-min-nan',
00141 'vro' => 'fiu-vro',
00142 'yue' => 'zh-yue',
00143
00144
00145 'als' => '',
00146 'be-x-old' => '',
00147 'crh' => '',
00148 'de-at' => '',
00149 'de-ch' => '',
00150 'de-formal' => '',
00151 'dk' => '',
00152 'en-au' => '',
00153 'en-ca' => '',
00154 'en-gb' => '',
00155 'es-419' => '',
00156 'fiu-vro' => '',
00157 'gan' => '',
00158 'got' => '',
00159 'hif' => '',
00160 'hu-formal' => '',
00161 'iu' => '',
00162 'kk' => '',
00163 'kk-cn' => '',
00164 'kk-kz' => '',
00165 'kk-tr' => '',
00166 'ko-kp' => '',
00167 'ku' => '',
00168 'ku-arab' => '',
00169 'nb' => '',
00170 'nl-be' => '',
00171 'nl-informal' => '',
00172 'ruq' => '',
00173 'simple' => '',
00174 'sr' => '',
00175 'tg' => '',
00176 'tp' => '',
00177 'tt' => '',
00178 'ug' => '',
00179 'zh' => '',
00180 'zh-classical' => '',
00181 'zh-cn' => '',
00182 'zh-sg' => '',
00183 'zh-hk' => '',
00184 'zh-min-nan' => '',
00185 'zh-mo' => '',
00186 'zh-my' => '',
00187 'zh-tw' => '',
00188 'zh-yue' => '',
00189 );
00190
00191 public function __construct() {
00192 parent::__construct();
00193 $this->mDescription = 'Script to generate statistics about the localisation ' .
00194 'level of one or more message groups.';
00195 $this->addOption(
00196 'groups',
00197 '(optional) Comma separated list of groups',
00198 false,
00199 true
00200 );
00201 $this->addOption(
00202 'output',
00203 '(optional) csv: Comma Separated Values, wiki: MediaWiki syntax, ' .
00204 'text: Text with tabs. Default: default',
00205 false,
00206 true
00207 );
00208 $this->addOption(
00209 'skiplanguages',
00210 '(optional) Comma separated list of languages to be skipped',
00211 false,
00212 true
00213 );
00214 $this->addOption(
00215 'skipzero',
00216 '(optional) Skip languages that do not have any localisation at all'
00217 );
00218 $this->addOption(
00219 'legenddetail',
00220 '(optional) Page name for legend to be transcluded at the top of the details table',
00221 false,
00222 true
00223 );
00224 $this->addOption(
00225 'legendsummary',
00226 '(optional) Page name for legend to be transcluded at the top of the summary table',
00227 false,
00228 true
00229 );
00230 $this->addOption(
00231 'fuzzy',
00232 '(optional) Add column for fuzzy counts'
00233 );
00234 $this->addOption(
00235 'speakers',
00236 '(optional) Add column for number of speakers (est.). ' .
00237 'Only valid when combined with "most"'
00238 );
00239 $this->addOption(
00240 'nol10n',
00241 '(optional) Do not add localised language name if I18ntags is installed'
00242 );
00243 $this->addOption(
00244 'continent',
00245 '(optional) Add a continent column. Only available when output is ' .
00246 '"wiki" or not specified.'
00247 );
00248 $this->addOption(
00249 'summary',
00250 '(optional) Add a summary with counts and scores per continent category ' .
00251 'and totals. Only available for a valid "most" value.',
00252 false,
00253 true
00254 );
00255 $this->addOption(
00256 'wmfscore',
00257 'Only output WMF language code and weighted score for all ' .
00258 'language codes for weighing group "wikimedia" in CSV. This ' .
00259 'report must keep a stable layout as it is used/will be ' .
00260 'used in the Wikimedia statistics.'
00261 );
00262 $this->addOption(
00263 'most',
00264 '(optional) "mediawiki" or "wikimedia". Report on the 50 most ' .
00265 'spoken languages. Skipzero is ignored. If a valid scope is ' .
00266 'defined, the group list and fuzzy are ignored and the ' .
00267 'localisation levels are weighted and reported.',
00268 false,
00269 true
00270 );
00271 }
00272
00273 public function execute() {
00274 $output = $this->getOption( 'output', 'default' );
00275
00276
00277 switch ( $output ) {
00278 case 'wiki':
00279 $out = new wikiStatsOutput();
00280 break;
00281 case 'text':
00282 $out = new textStatsOutput();
00283 break;
00284 case 'csv':
00285 $out = new csvStatsOutput();
00286 break;
00287 default:
00288 $out = new TranslateStatsOutput();
00289 }
00290
00291 $skipLanguages = array();
00292 if ( $this->hasOption( 'skiplanguages' ) ) {
00293 $skipLanguages = array_map(
00294 'trim',
00295 explode( ',', $this->getOption( 'skiplanguages' ) )
00296 );
00297 }
00298
00299 $reportScore = false;
00300
00301 $most = $this->getOption( 'most' );
00302 $weights = array();
00303 if ( $most && isset( $localisedWeights[$most] ) ) {
00304 $reportScore = true;
00305
00306 foreach ( $localisedWeights[$most] as $weight ) {
00307 $weights[] = $weight;
00308 }
00309 }
00310
00311
00312 $l10n = false;
00313 if ( ( $output === 'wiki' || $output === 'default' ) &&
00314 !$this->hasOption( 'nol10n' )
00315 ) {
00316 $l10n = true;
00317 }
00318
00319 $wmfscore = $this->hasOption( 'wmfscore ' );
00320
00321
00322 $groups = array();
00323 if ( $reportScore ) {
00324 $reqGroups = array_keys( $this->localisedWeights[$most] );
00325 } elseif ( !$wmfscore ) {
00326 $reqGroups = array_map( 'trim', explode( ',', $this->getOption( 'groups' ) ) );
00327 } else {
00328 $reqGroups = array_keys( $this->localisedWeights['wikimedia'] );
00329 }
00330
00331
00332 $allGroups = MessageGroups::singleton()->getGroups();
00333
00334
00335 foreach ( $reqGroups as $id ) {
00336
00337 $id = str_replace( '_', ' ', $id );
00338 if ( isset( $allGroups[$id] ) ) {
00339 $groups[$id] = $allGroups[$id];
00340 } else {
00341 $this->output( "Unknown group: $id" );
00342 }
00343 }
00344
00345 if ( $wmfscore ) {
00346
00347 $out = new csvStatsOutput();
00348 $reportScore = true;
00349
00350 $weights = array();
00351 foreach ( $this->localisedWeights['wikimedia'] as $weight ) {
00352 $weights[] = $weight;
00353 }
00354 $wmfscores = array();
00355 }
00356
00357 if ( !count( $groups ) ) {
00358 showUsage();
00359 }
00360
00361
00362 $languages = Language::fetchLanguageNames( false );
00363
00364 ksort( $languages );
00365
00366 if ( $this->hasOption( 'legenddetail' ) ) {
00367 $out->addFreeText( "{{" . $this->getOption( 'legenddetail' ) . "}}\n" );
00368 }
00369
00370 $totalWeight = 0;
00371 if ( $reportScore ) {
00372 if ( $wmfscore ) {
00373 foreach ( $this->localisedWeights['wikimedia'] as $weight ) {
00374 $totalWeight += $weight;
00375 }
00376 } else {
00377 foreach ( $this->localisedWeights[$most] as $weight ) {
00378 $totalWeight += $weight;
00379 }
00380 }
00381 }
00382
00383 $showContinent = $this->getOption( 'continent' );
00384 if ( !$wmfscore ) {
00385
00386 $out->heading();
00387
00388 $out->blockstart();
00389
00390 if ( $most ) {
00391 $out->element( ( $l10n ? "{{int:translate-gs-pos}}" : 'Pos.' ), true );
00392 }
00393
00394 $out->element( ( $l10n ? "{{int:translate-gs-code}}" : 'Code' ), true );
00395 $out->element( ( $l10n ? "{{int:translate-page-language}}" : 'Language' ), true );
00396 if ( $showContinent ) {
00397 $out->element( ( $l10n ? "{{int:translate-gs-continent}}" : 'Continent' ), true );
00398 }
00399
00400 if ( $most && $this->hasOption( 'speakers' ) ) {
00401 $out->element( ( $l10n ? "{{int:translate-gs-speakers}}" : 'Speakers' ), true );
00402 }
00403
00404 if ( $reportScore ) {
00405 $out->element(
00406 ( $l10n ? "{{int:translate-gs-score}}" : 'Score' ) . ' (' . $totalWeight . ')',
00407 true
00408 );
00409 }
00410
00414 foreach ( $groups as $g ) {
00415
00416 if ( $reportScore ) {
00417 $gid = $g->getId();
00418 $heading = $g->getLabel() . " (" . $this->localisedWeights[$most][$gid] . ")";
00419 } else {
00420 $heading = $g->getLabel();
00421 }
00422 $out->element( $heading, true );
00423 if ( !$reportScore && $this->hasOption( 'fuzzy' ) ) {
00424 $out->element( ( $l10n ? "{{int:translate-percentage-fuzzy}}" : 'Fuzzy' ), true );
00425 }
00426 }
00427
00428 $out->blockend();
00429 }
00430
00431 $rows = array();
00432 foreach ( $languages as $code => $name ) {
00433
00434 if ( in_array( $code, $skipLanguages ) ) {
00435 continue;
00436 }
00437 $rows[$code] = array();
00438 }
00439
00440 foreach ( $groups as $groupName => $g ) {
00441 $stats = MessageGroupStats::forGroup( $groupName );
00442
00443
00444 foreach ( $languages as $code => $name ) {
00445
00446 if ( !$most && in_array( $code, $skipLanguages ) ) {
00447 continue;
00448 }
00449
00450
00451 if ( $wmfscore && isset( $wikimediaCodeMap[$code] ) && $wikimediaCodeMap[$code] == '' ) {
00452 continue;
00453 }
00454
00455
00456 if ( $most && !isset( $mostSpokenLanguages[$code] ) ) {
00457 continue;
00458 }
00459
00460 $total = $stats[$code][MessageGroupStats::TOTAL];
00461 $translated = $stats[$code][MessageGroupStats::TRANSLATED];
00462 $fuzzy = $stats[$code][MessageGroupStats::FUZZY];
00463
00464 $rows[$code][] = array( false, $translated, $total );
00465
00466 if ( $this->hasOption( 'fuzzy' ) ) {
00467 $rows[$code][] = array( true, $fuzzy, $total );
00468 }
00469 }
00470
00471 unset( $collection );
00472 }
00473
00474
00475 $summarise = false;
00476 if ( $this->hasOption( 'summary' ) ) {
00477 $summarise = true;
00478 $summary = array();
00479 }
00480
00481 foreach ( $languages as $code => $name ) {
00482
00483 if ( !$most && in_array( $code, $skipLanguages ) ) {
00484 continue;
00485 }
00486
00487
00488 if ( $wmfscore && isset( $wikimediaCodeMap[$code] ) && $wikimediaCodeMap[$code] == '' ) {
00489 continue;
00490 }
00491
00492
00493 if ( $most && !isset( $mostSpokenLanguages[$code] ) ) {
00494 continue;
00495 }
00496
00497 $columns = $rows[$code];
00498
00499 $allZero = true;
00500 foreach ( $columns as $fields ) {
00501 if ( intval( $fields[1] ) !== 0 ) {
00502 $allZero = false;
00503 }
00504 }
00505
00506
00507 if ( $allZero && $this->hasOption( 'skipzero' ) ) {
00508 continue;
00509 }
00510
00511
00512 if ( !$wmfscore ) {
00513 $out->blockstart();
00514 }
00515
00516
00517 if ( $most ) {
00518 $out->element( $this->mostSpokenLanguages[$code][0] );
00519 }
00520
00521
00522 if ( !$wmfscore ) {
00523
00524 $out->element( $code );
00525
00526 if ( $l10n && function_exists( 'efI18nTagsInit' ) ) {
00527 $out->element( "{{#languagename:" . $code . "}}" );
00528 } else {
00529 $out->element( $name );
00530 }
00531 }
00532
00533
00534 if ( $showContinent ) {
00535 if ( $this->mostSpokenLanguages[$code][2] == 'multiple' ) {
00536 $continent = ( $l10n ? "{{int:translate-gs-multiple}}" : 'Multiple' );
00537 } else {
00538 $continent = $l10n ?
00539 "{{int:timezoneregion-" . $this->mostSpokenLanguages[$code][2] . "}}" :
00540 ucfirst( $this->mostSpokenLanguages[$code][2] );
00541 }
00542
00543 $out->element( $continent );
00544 }
00545
00546
00547 if ( $most && $this->hasOption( 'speakers' ) ) {
00548 $out->element( number_format( $this->mostSpokenLanguages[$code][1] ) );
00549 }
00550
00551
00552 if ( $reportScore ) {
00553
00554 $i = 0;
00555
00556 $score = 0;
00557
00558 foreach ( $columns as $fields ) {
00559 list( , $upper, $total ) = $fields;
00560
00561 $score += ( $weights[$i] * $upper ) / $total;
00562 $i++;
00563 }
00564
00565
00566 $score = number_format( $score, 0 );
00567
00568 if ( $summarise ) {
00569 $continent = $this->mostSpokenLanguages[$code][2];
00570 if ( isset( $summary[$continent] ) ) {
00571 $newcount = $summary[$continent][0] + 1;
00572 $newscore = $summary[$continent][1] + (int)$score;
00573 } else {
00574 $newcount = 1;
00575 $newscore = $score;
00576 }
00577
00578 $summary[$continent] = array( $newcount, $newscore );
00579 }
00580
00581 if ( $wmfscore ) {
00582
00583
00584
00585 if ( isset( $wikimediaCodeMap[$code] ) ) {
00586 $wmfcode = $wikimediaCodeMap[$code];
00587 } else {
00588 $codeparts = explode( '-', $code );
00589 $wmfcode = $codeparts[0];
00590 }
00591
00592 if ( isset( $wmfscores[$wmfcode] ) ) {
00593 $count = $wmfscores[$wmfcode]['count'] + 1;
00594 $tmpWmfScore = (int)$wmfscores[$wmfcode]['score'];
00595 $tmpWmfCount = (int)$wmfscores[$wmfcode]['count'];
00596 $score = ( ( $tmpWmfCount * $tmpWmfScore ) + (int)$score ) / $count;
00597 $wmfscores[$wmfcode] = array( 'score' => $score, 'count' => $count );
00598 } else {
00599 $wmfscores[$wmfcode] = array( 'score' => $score, 'count' => 1 );
00600 }
00601 } else {
00602 $out->element( $score );
00603 }
00604 }
00605
00606
00607 if ( !$wmfscore ) {
00608 foreach ( $columns as $fields ) {
00609 list( $invert, $upper, $total ) = $fields;
00610 $c = $out->formatPercent( $upper, $total, $invert );
00611 $out->element( $c );
00612 }
00613
00614 $out->blockend();
00615 }
00616 }
00617
00618 $out->footer();
00619
00620 if ( $reportScore && $this->hasOption( 'summary' ) ) {
00621 if ( $reportScore && $this->hasOption( 'legendsummary' ) ) {
00622 $out->addFreeText( "{{" . $this->getOption( 'legendsummary' ) . "}}\n" );
00623 }
00624
00625 $out->summaryheading();
00626
00627 $out->blockstart();
00628
00629 $out->element( $l10n ? "{{int:translate-gs-continent}}" : 'Continent', true );
00630 $out->element( $l10n ? "{{int:translate-gs-count}}" : 'Count', true );
00631 $out->element( $l10n ? "{{int:translate-gs-avgscore}}" : 'Avg. score', true );
00632
00633 $out->blockend();
00634
00635 ksort( $summary );
00636
00637 $totals = array( 0, 0 );
00638
00639 foreach ( $summary as $key => $values ) {
00640 $out->blockstart();
00641
00642 if ( $key == 'multiple' ) {
00643 $out->element( $l10n ? "{{int:translate-gs-multiple}}" : 'Multiple' );
00644 } else {
00645 $out->element( $l10n ? "{{int:timezoneregion-" . $key . "}}" : ucfirst( $key ) );
00646 }
00647 $out->element( $values[0] );
00648 $out->element( number_format( $values[1] / $values[0] ) );
00649
00650 $out->blockend();
00651
00652 $totals[0] += $values[0];
00653 $totals[1] += $values[1];
00654 }
00655
00656 $out->blockstart();
00657 $out->element( $l10n ? "{{int:translate-gs-total}}" : 'Total' );
00658 $out->element( $totals[0] );
00659 $out->element( number_format( $totals[1] / $totals[0] ) );
00660 $out->blockend();
00661
00662 $out->footer();
00663 }
00664
00665
00666 if ( $wmfscore ) {
00667 ksort( $wmfscores );
00668
00669 foreach ( $wmfscores as $code => $stats ) {
00670 echo $code . ';' . number_format( $stats['score'] ) . ";\n";
00671 }
00672 }
00673 }
00674 }
00675
00682 class TranslateStatsOutput extends wikiStatsOutput {
00683 function heading() {
00684 echo '{| class="mw-ext-translate-groupstatistics sortable wikitable" border="2" ' .
00685 'cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: ' .
00686 '1px #AAAAAA solid; border-collapse: collapse; clear:both;" width="100%"' . "\n";
00687 }
00688
00689 function summaryheading() {
00690 echo "\n" . '{| class="mw-ext-translate-groupstatistics sortable wikitable" ' .
00691 'border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; ' .
00692 'border: 1px #AAAAAA solid; border-collapse: collapse; clear:both;"' . "\n";
00693 }
00694
00695 function addFreeText( $freeText ) {
00696 echo $freeText;
00697 }
00698 }
00699
00700 $maintClass = 'GroupStatictics';
00701 require_once RUN_MAINTENANCE_IF_MAIN;