00001 <?php
00015 class GettextPluralException extends MwException {
00016 }
00017
00022 class GettextFFS extends SimpleFFS {
00023 public function supportsFuzzy() {
00024 return 'yes';
00025 }
00026
00027 public function getFileExtensions() {
00028 return array( '.pot', '.po' );
00029 }
00030
00031 protected $offlineMode = false;
00032
00036 public function setOfflineMode( $value ) {
00037 $this->offlineMode = $value;
00038 }
00039
00040 public function readFromVariable( $data ) {
00041 # Authors first
00042 $matches = array();
00043 preg_match_all( '/^#\s*Author:\s*(.*)$/m', $data, $matches );
00044 $authors = $matches[1];
00045
00046 # Then messages and everything else
00047 $parsedData = $this->parseGettext( $data );
00048 $parsedData['AUTHORS'] = $authors;
00049
00050 foreach ( $parsedData['MESSAGES'] as $key => $value ) {
00051 if ( $value === '' ) {
00052 unset( $parsedData['MESSAGES'][$key] );
00053 }
00054 }
00055
00056 return $parsedData;
00057 }
00058
00059 public function parseGettext( $data ) {
00060 $mangler = $this->group->getMangler();
00061 $useCtxtAsKey = isset( $this->extra['CtxtAsKey'] ) && $this->extra['CtxtAsKey'];
00062 $keyAlgorithm = 'legacy';
00063 if ( isset( $this->extra['keyAlgorithm'] ) ) {
00064 $keyAlgorithm = $this->extra['keyAlgorithm'];
00065 }
00066
00067 return self::parseGettextData( $data, $useCtxtAsKey, $mangler, $keyAlgorithm );
00068 }
00069
00080 public static function parseGettextData( $data, $useCtxtAsKey, $mangler, $keyAlgorithm ) {
00081 $potmode = false;
00082
00083
00084 $data = str_replace( "\r\n", "\n", $data );
00085
00086
00087
00088
00089 $sections = preg_split( '/\n{2,}/', $data );
00090
00091
00092 $headerSection = array_shift( $sections );
00093
00094
00095
00096 $match = self::expectKeyword( 'msgstr', $headerSection );
00097 if ( $match !== null ) {
00098 $headerBlock = self::formatForWiki( $match, 'trim' );
00099 $headers = self::parseHeaderTags( $headerBlock );
00100
00101
00102 $flags = self::parseFlags( $headerSection );
00103 if ( in_array( 'fuzzy', $flags, true ) ) {
00104 $potmode = true;
00105 }
00106 } else {
00107 throw new MWException( "Gettext file header was not found:\n\n$data" );
00108 }
00109
00110 $template = array();
00111 $messages = array();
00112
00113
00114 $metadata = array();
00115 if ( isset( $headers['X-Language-Code'] ) ) {
00116 $metadata['code'] = $headers['X-Language-Code'];
00117 }
00118
00119 if ( isset( $headers['X-Message-Group'] ) ) {
00120 $metadata['group'] = $headers['X-Message-Group'];
00121 }
00122
00123
00124
00125 $pluralCount = false;
00126 if ( isset( $headers['Plural-Forms'] ) ) {
00127 if ( preg_match( '/nplurals=([0-9]+).*;/', $headers['Plural-Forms'], $matches ) ) {
00128 $pluralCount = $metadata['plural'] = $matches[1];
00129 }
00130 }
00131
00132
00133 foreach ( $sections as $section ) {
00134
00135 $item = self::parseGettextSection( $section, $pluralCount, $metadata );
00136 if ( $item === false ) {
00137 continue;
00138 }
00139
00140 if ( $useCtxtAsKey ) {
00141 if ( !isset( $item['ctxt'] ) ) {
00142 error_log( "ctxt missing for: $section" );
00143 continue;
00144 }
00145 $key = $item['ctxt'];
00146 } else {
00147 $key = self::generateKeyFromItem( $item, $keyAlgorithm );
00148 }
00149
00150 $key = $mangler->mangle( $key );
00151 $messages[$key] = $potmode ? $item['id'] : $item['str'];
00152 $template[$key] = $item;
00153 }
00154
00155 return array(
00156 'MESSAGES' => $messages,
00157 'TEMPLATE' => $template,
00158 'METADATA' => $metadata,
00159 'HEADERS' => $headers
00160 );
00161 }
00162
00163 public static function parseGettextSection( $section, $pluralCount, &$metadata ) {
00164
00165 if ( trim( $section ) === '' ) {
00166 return false;
00167 }
00168
00169
00170
00171
00172
00173 if ( preg_match( '/^#~/m', $section ) ) {
00174 return false;
00175 }
00176
00177 $item = array(
00178 'ctxt' => false,
00179 'id' => '',
00180 'str' => '',
00181 'flags' => array(),
00182 'comments' => array(),
00183 );
00184
00185 $match = self::expectKeyword( 'msgid', $section );
00186 if ( $match !== null ) {
00187 $item['id'] = self::formatForWiki( $match );
00188 } else {
00189 throw new MWException( "Unable to parse msgid:\n\n$section" );
00190 }
00191
00192 $match = self::expectKeyword( 'msgctxt', $section );
00193 if ( $match !== null ) {
00194 $item['ctxt'] = self::formatForWiki( $match );
00195 }
00196
00197 $pluralMessage = false;
00198 $match = self::expectKeyword( 'msgid_plural', $section );
00199 if ( $match !== null ) {
00200 $pluralMessage = true;
00201 $plural = self::formatForWiki( $match );
00202 $item['id'] = "{{PLURAL:GETTEXT|{$item['id']}|$plural}}";
00203 }
00204
00205 if ( $pluralMessage ) {
00206 $pluralMessageText = self::processGettextPluralMessage( $pluralCount, $section );
00207
00208
00209 if ( $pluralMessageText !== '' ) {
00210 $item['str'] = $pluralMessageText;
00211 }
00212 } else {
00213 $match = self::expectKeyword( 'msgstr', $section );
00214 if ( $match !== null ) {
00215 $item['str'] = self::formatForWiki( $match );
00216 } else {
00217 throw new MWException( "Unable to parse msgstr:\n\n$section" );
00218 }
00219 }
00220
00221
00222 $flags = self::parseFlags( $section );
00223 foreach ( $flags as $key => $flag ) {
00224 if ( $flag === 'fuzzy' ) {
00225 $item['str'] = TRANSLATE_FUZZY . $item['str'];
00226 unset( $flags[$key] );
00227 }
00228 }
00229 $item['flags'] = $flags;
00230
00231
00232 $matches = array();
00233 if ( preg_match_all( '/^#(.?) (.*)$/m', $section, $matches, PREG_SET_ORDER ) ) {
00234 foreach ( $matches as $match ) {
00235 if ( $match[1] !== ',' && strpos( $match[1], '[Wiki]' ) !== 0 ) {
00236 $item['comments'][$match[1]][] = $match[2];
00237 }
00238 }
00239 }
00240
00241 return $item;
00242 }
00243
00244 public static function processGettextPluralMessage( $pluralCount, $section ) {
00245 $actualForms = array();
00246
00247 for ( $i = 0; $i < $pluralCount; $i++ ) {
00248 $match = self::expectKeyword( "msgstr\\[$i\\]", $section );
00249
00250 if ( $match !== null ) {
00251 $actualForms[] = self::formatForWiki( $match );
00252 } else {
00253 $actualForms[] = '';
00254 error_log( "Plural $i not found, expecting total of $pluralCount for $section" );
00255 }
00256 }
00257
00258 if ( array_sum( array_map( 'strlen', $actualForms ) ) > 0 ) {
00259 return '{{PLURAL:GETTEXT|' . implode( '|', $actualForms ) . '}}';
00260 } else {
00261 return '';
00262 }
00263 }
00264
00265 public static function parseFlags( $section ) {
00266 $matches = array();
00267 if ( preg_match( '/^#,(.*)$/mu', $section, $matches ) ) {
00268 return array_map( 'trim', explode( ',', $matches[1] ) );
00269 } else {
00270 return array();
00271 }
00272 }
00273
00274 public static function expectKeyword( $name, $section ) {
00275
00276
00277
00278 $poformat = '".*"\n?(^".*"$\n?)*';
00279
00280 $matches = array();
00281 if ( preg_match( "/^$name\s($poformat)/mx", $section, $matches ) ) {
00282 return $matches[1];
00283 } else {
00284 return null;
00285 }
00286 }
00287
00295 public static function generateKeyFromItem( array $item, $algorithm = 'legacy' ) {
00296 $lang = Language::factory( 'en' );
00297
00298 if ( $item['ctxt'] === '' ) {
00299
00300
00301
00302 $hash = sha1( $item['id'] . 'MSGEMPTYCTXT' );
00303 } else {
00304 $hash = sha1( $item['ctxt'] . $item['id'] );
00305 }
00306
00307 if ( $algorithm === 'simple' ) {
00308 $hash = substr( $hash, 0, 6 );
00309 $snippet = $lang->truncate( $item['id'], 30, '' );
00310 $snippet = str_replace( ' ', '_', trim( $snippet ) );
00311 } else {
00312 global $wgLegalTitleChars;
00313 $snippet = $item['id'];
00314 $snippet = preg_replace( "/[^$wgLegalTitleChars]/", ' ', $snippet );
00315 $snippet = preg_replace( "/[:&%\/_]/", ' ', $snippet );
00316 $snippet = preg_replace( "/ {2,}/", ' ', $snippet );
00317 $snippet = $lang->truncate( $snippet, 30, '' );
00318 $snippet = str_replace( ' ', '_', trim( $snippet ) );
00319 }
00320
00321 return "$hash-$snippet";
00322 }
00323
00334 public static function formatForWiki( $data, $whitespace = 'mark' ) {
00335 $quotePattern = '/(^"|"$\n?)/m';
00336 $data = preg_replace( $quotePattern, '', $data );
00337 $data = stripcslashes( $data );
00338
00339 if ( preg_match( '/\s$/', $data ) ) {
00340 if ( $whitespace === 'mark' ) {
00341 $data .= '\\';
00342 } elseif ( $whitespace === 'trim' ) {
00343 $data = rtrim( $data );
00344 } else {
00345
00346 throw new MWException( 'Unknown action for whitespace' );
00347 }
00348 }
00349
00350 return $data;
00351 }
00352
00353 public static function parseHeaderTags( $headers ) {
00354 $tags = array();
00355 foreach ( explode( "\n", $headers ) as $line ) {
00356 if ( strpos( $line, ':' ) === false ) {
00357 error_log( __METHOD__ . ": $line" );
00358 }
00359 list( $key, $value ) = explode( ':', $line, 2 );
00360 $tags[trim( $key )] = trim( $value );
00361 }
00362
00363 return $tags;
00364 }
00365
00366 protected function writeReal( MessageCollection $collection ) {
00367 $pot = $this->read( 'en' );
00368 $template = $this->read( $collection->code );
00369 $pluralCount = false;
00370 $output = $this->doGettextHeader( $collection, $template, $pluralCount );
00371
00372 foreach ( $collection as $key => $m ) {
00373 $transTemplate = isset( $template['TEMPLATE'][$key] ) ?
00374 $template['TEMPLATE'][$key] : array();
00375 $potTemplate = isset( $pot['TEMPLATE'][$key] ) ?
00376 $pot['TEMPLATE'][$key] : array();
00377
00378 $output .= $this->formatMessageBlock( $key, $m, $transTemplate, $potTemplate, $pluralCount );
00379 }
00380
00381 return $output;
00382 }
00383
00384 protected function doGettextHeader( MessageCollection $collection, $template, &$pluralCount ) {
00385 global $wgSitename;
00386
00387 $code = $collection->code;
00388 $name = TranslateUtils::getLanguageName( $code );
00389 $native = TranslateUtils::getLanguageName( $code, $code );
00390 $authors = $this->doAuthors( $collection );
00391 if ( isset( $this->extra['header'] ) ) {
00392 $extra = "# --\n" . $this->extra['header'];
00393 } else {
00394 $extra = '';
00395 }
00396
00397 $output = <<<PHP
00398 # Translation of {$this->group->getLabel()} to $name ($native)
00399 # Exported from $wgSitename
00400 #
00401 $authors$extra
00402 PHP;
00403
00404
00405 $output = trim( $output ) . "\n";
00406
00407 $specs = isset( $template['HEADERS'] ) ? $template['HEADERS'] : array();
00408
00409 $timestamp = wfTimestampNow();
00410 $specs['PO-Revision-Date'] = self::formatTime( $timestamp );
00411 if ( $this->offlineMode ) {
00412 $specs['POT-Creation-Date'] = self::formatTime( $timestamp );
00413 } elseif ( $this->group instanceof MessageGroupBase ) {
00414 $specs['X-POT-Import-Date'] = self::formatTime( wfTimestamp( TS_MW, $this->getPotTime() ) );
00415 }
00416 $specs['Content-Type'] = 'text/plain; charset=UTF-8';
00417 $specs['Content-Transfer-Encoding'] = '8bit';
00418 $specs['Language'] = wfBCP47( $this->group->mapCode( $code ) );
00419 wfRunHooks( 'Translate:GettextFFS:headerFields', array( &$specs, $this->group, $code ) );
00420 $specs['X-Generator'] = $this->getGenerator();
00421
00422 if ( $this->offlineMode ) {
00423 $specs['X-Language-Code'] = $code;
00424 $specs['X-Message-Group'] = $this->group->getId();
00425 }
00426
00427 $plural = self::getPluralRule( $code );
00428 if ( $plural ) {
00429 $specs['Plural-Forms'] = $plural;
00430 } elseif ( !isset( $specs['Plural-Forms'] ) ) {
00431 $specs['Plural-Forms'] = 'nplurals=2; plural=(n != 1);';
00432 }
00433
00434 $match = array();
00435 preg_match( '/nplurals=(\d+);/', $specs['Plural-Forms'], $match );
00436 $pluralCount = $match[1];
00437
00438 $output .= 'msgid ""' . "\n";
00439 $output .= 'msgstr ""' . "\n";
00440 $output .= '""' . "\n";
00441
00442 foreach ( $specs as $k => $v ) {
00443 $output .= self::escape( "$k: $v\n" ) . "\n";
00444 }
00445
00446 $output .= "\n";
00447
00448 return $output;
00449 }
00450
00451 protected function doAuthors( MessageCollection $collection ) {
00452 $output = '';
00453 $authors = $collection->getAuthors();
00454 $authors = $this->filterAuthors( $authors, $collection->code );
00455
00456 foreach ( $authors as $author ) {
00457 $output .= "# Author: $author\n";
00458 }
00459
00460 return $output;
00461 }
00462
00463 protected function formatMessageBlock( $key, $m, $trans, $pot, $pluralCount ) {
00464 $header = $this->formatDocumentation( $key );
00465 $content = '';
00466
00467 $comments = self::chainGetter( 'comments', $pot, $trans, array() );
00468 foreach ( $comments as $type => $typecomments ) {
00469 foreach ( $typecomments as $comment ) {
00470 $header .= "#$type $comment\n";
00471 }
00472 }
00473
00474 $flags = self::chainGetter( 'flags', $pot, $trans, array() );
00475 $flags = array_merge( $m->getTags(), $flags );
00476
00477 if ( $this->offlineMode ) {
00478 $content .= 'msgctxt ' . self::escape( $key ) . "\n";
00479 } else {
00480 $ctxt = self::chainGetter( 'ctxt', $pot, $trans, false );
00481 if ( $ctxt !== false ) {
00482 $content .= 'msgctxt ' . self::escape( $ctxt ) . "\n";
00483 }
00484 }
00485
00486 $msgid = $m->definition();
00487 $msgstr = $m->translation();
00488 if ( strpos( $msgstr, TRANSLATE_FUZZY ) !== false ) {
00489 $msgstr = str_replace( TRANSLATE_FUZZY, '', $msgstr );
00490
00491 $flags[] = 'fuzzy';
00492 }
00493
00494 if ( preg_match( '/{{PLURAL:GETTEXT/i', $msgid ) ) {
00495 $forms = $this->splitPlural( $msgid, 2 );
00496 $content .= 'msgid ' . $this->escape( $forms[0] ) . "\n";
00497 $content .= 'msgid_plural ' . $this->escape( $forms[1] ) . "\n";
00498
00499 try {
00500 $forms = $this->splitPlural( $msgstr, $pluralCount );
00501 foreach ( $forms as $index => $form ) {
00502 $content .= "msgstr[$index] " . $this->escape( $form ) . "\n";
00503 }
00504 } catch ( GettextPluralException $e ) {
00505 $flags[] = 'invalid-plural';
00506 for ( $i = 0; $i < $pluralCount; $i++ ) {
00507 $content .= "msgstr[$i] \"\"\n";
00508 }
00509 }
00510 } else {
00511 $content .= 'msgid ' . self::escape( $msgid ) . "\n";
00512 $content .= 'msgstr ' . self::escape( $msgstr ) . "\n";
00513 }
00514
00515 if ( $flags ) {
00516 sort( $flags );
00517 $header .= "#, " . implode( ', ', array_unique( $flags ) ) . "\n";
00518 }
00519
00520 $output = $header ? $header : "#\n";
00521 $output .= $content . "\n";
00522
00523 return $output;
00524 }
00525
00526 protected static function chainGetter( $key, $a, $b, $default ) {
00527 if ( isset( $a[$key] ) ) {
00528 return $a[$key];
00529 } elseif ( isset( $b[$key] ) ) {
00530 return $b[$key];
00531 } else {
00532 return $default;
00533 }
00534 }
00535
00536 protected static function formatTime( $time ) {
00537 $lang = Language::factory( 'en' );
00538
00539 return $lang->sprintfDate( 'xnY-xnm-xnd xnH:xni:xns+0000', $time );
00540 }
00541
00542 protected function getPotTime() {
00543 $defs = new MessageGroupCache( $this->group );
00544
00545 return $defs->exists() ? $defs->getTimestamp() : wfTimestampNow();
00546 }
00547
00548 protected function getGenerator() {
00549 return 'MediaWiki ' . SpecialVersion::getVersion() .
00550 "; Translate " . TRANSLATE_VERSION;
00551 }
00552
00553 protected function formatDocumentation( $key ) {
00554 global $wgTranslateDocumentationLanguageCode;
00555
00556 if ( !$this->offlineMode ) {
00557 return '';
00558 }
00559
00560 $code = $wgTranslateDocumentationLanguageCode;
00561 if ( !$code ) {
00562 return '';
00563 }
00564
00565 $documentation = TranslateUtils::getMessageContent( $key, $code, $this->group->getNamespace() );
00566 if ( !is_string( $documentation ) ) {
00567 return '';
00568 }
00569
00570 $lines = explode( "\n", $documentation );
00571 $out = '';
00572 foreach ( $lines as $line ) {
00573 $out .= "#. [Wiki] $line\n";
00574 }
00575
00576 return $out;
00577 }
00578
00579 protected static function escape( $line ) {
00580
00581 $line = preg_replace( '/(\s)\\\\$/', '\1', $line );
00582 $line = addcslashes( $line, '\\"' );
00583 $line = str_replace( "\n", '\n', $line );
00584 $line = '"' . $line . '"';
00585
00586 return $line;
00587 }
00588
00594 public static function getPluralRule( $code ) {
00595 $rulefile = __DIR__ . '/../data/plural-gettext.txt';
00596 $rules = file_get_contents( $rulefile );
00597 foreach ( explode( "\n", $rules ) as $line ) {
00598 if ( trim( $line ) === '' ) {
00599 continue;
00600 }
00601 list( $rulecode, $rule ) = explode( "\t", $line );
00602 if ( $rulecode === $code ) {
00603 return $rule;
00604 }
00605 }
00606
00607 return '';
00608 }
00609
00610 protected function splitPlural( $text, $forms ) {
00611 if ( $forms === 1 ) {
00612 return $text;
00613 }
00614
00615 $placeholder = TranslateUtils::getPlaceholder();
00616 # |/| is commonly used in KDE to support inflections
00617 $text = str_replace( '|/|', $placeholder, $text );
00618
00619 $plurals = array();
00620 $match = preg_match_all( '/{{PLURAL:GETTEXT\|(.*)}}/iUs', $text, $plurals );
00621 if ( !$match ) {
00622 throw new GettextPluralException( "Failed to find plural in: $text" );
00623 }
00624
00625 $splitPlurals = array();
00626 for ( $i = 0; $i < $forms; $i++ ) {
00627 # Start with the hole string
00628 $pluralForm = $text;
00629 # Loop over *each* {{PLURAL}} instance and replace
00630 # it with the plural form belonging to this index
00631 foreach ( $plurals[0] as $index => $definition ) {
00632 $parsedFormsArray = explode( '|', $plurals[1][$index] );
00633 if ( !isset( $parsedFormsArray[$i] ) ) {
00634 error_log( "Too few plural forms in: $text" );
00635 $pluralForm = '';
00636 } else {
00637 $pluralForm = str_replace( $pluralForm, $definition, $parsedFormsArray[$i] );
00638 }
00639 }
00640
00641 $pluralForm = str_replace( $placeholder, '|/|', $pluralForm );
00642 $splitPlurals[$i] = $pluralForm;
00643 }
00644
00645 return $splitPlurals;
00646 }
00647 }