DatabaseTTMServer.php

Go to the documentation of this file.
00001 <?php
00017 class DatabaseTTMServer extends TTMServer implements WritableTTMServer, ReadableTTMServer {
00018     protected $sids;
00019 
00024     protected function getDB( $mode = DB_SLAVE ) {
00025         return wfGetDB( $mode, 'ttmserver', $this->config['database'] );
00026     }
00027 
00028     public function update( MessageHandle $handle, $targetText ) {
00029         if ( !$handle->isValid() || $handle->getCode() === '' ) {
00030             return false;
00031         }
00032 
00033         $mkey = $handle->getKey();
00034         $group = $handle->getGroup();
00035         $targetLanguage = $handle->getCode();
00036         $sourceLanguage = $group->getSourceLanguage();
00037 
00038         // Skip definitions to not slow down mass imports etc.
00039         // These will be added when the first translation is made
00040         if ( $targetLanguage === $sourceLanguage ) {
00041             return false;
00042         }
00043 
00044         $definition = $group->getMessage( $mkey, $sourceLanguage );
00045         if ( !is_string( $definition ) || !strlen( trim( $definition ) ) ) {
00046             return false;
00047         }
00048 
00049         $context = Title::makeTitle( $handle->getTitle()->getNamespace(), $mkey );
00050         $dbw = $this->getDB( DB_MASTER );
00051         /* Check that the definition exists and fetch the sid. If not, add
00052          * the definition and retrieve the sid. If the definition changes,
00053          * we will create a new entry - otherwise we could at some point
00054          * get suggestions which do not match the original definition any
00055          * longer. The old translations are still kept until purged by
00056          * rerunning the bootstrap script. */
00057         $conds = array(
00058             'tms_context' => $context->getPrefixedText(),
00059             'tms_text' => $definition,
00060         );
00061 
00062         $sid = $dbw->selectField( 'translate_tms', 'tms_sid', $conds, __METHOD__ );
00063         if ( $sid === false ) {
00064             $sid = $this->insertSource( $context, $sourceLanguage, $definition );
00065         }
00066 
00067         // Delete old translations for this message if any. Could also use replace
00068         $deleteConds = array(
00069             'tmt_sid' => $sid,
00070             'tmt_lang' => $targetLanguage,
00071         );
00072         $dbw->delete( 'translate_tmt', $deleteConds, __METHOD__ );
00073 
00074         // Insert the new translation
00075         if ( $targetText !== null ) {
00076             $row = $deleteConds + array(
00077                 'tmt_text' => $targetText,
00078             );
00079 
00080             $dbw->insert( 'translate_tmt', $row, __METHOD__ );
00081         }
00082 
00083         return true;
00084     }
00085 
00086     protected function insertSource( Title $context, $sourceLanguage, $text ) {
00087         wfProfileIn( __METHOD__ );
00088         $row = array(
00089             'tms_lang' => $sourceLanguage,
00090             'tms_len' => mb_strlen( $text ),
00091             'tms_text' => $text,
00092             'tms_context' => $context->getPrefixedText(),
00093         );
00094 
00095         $dbw = $this->getDB( DB_MASTER );
00096         $dbw->insert( 'translate_tms', $row, __METHOD__ );
00097         $sid = $dbw->insertId();
00098 
00099         $fulltext = $this->filterForFulltext( $sourceLanguage, $text );
00100         if ( count( $fulltext ) ) {
00101             $row = array(
00102                 'tmf_sid' => $sid,
00103                 'tmf_text' => implode( ' ', $fulltext ),
00104             );
00105             $dbw->insert( 'translate_tmf', $row, __METHOD__ );
00106         }
00107 
00108         wfProfileOut( __METHOD__ );
00109 
00110         return $sid;
00111     }
00112 
00117     protected function filterForFulltext( $language, $input ) {
00118         wfProfileIn( __METHOD__ );
00119         $lang = Language::factory( $language );
00120 
00121         $text = preg_replace( '/[^[:alnum:]]/u', ' ', $input );
00122         $text = $lang->segmentByWord( $text );
00123         $text = $lang->lc( $text );
00124         $segments = preg_split( '/\s+/', $text, -1, PREG_SPLIT_NO_EMPTY );
00125         if ( count( $segments ) < 4 ) {
00126             wfProfileOut( __METHOD__ );
00127 
00128             return array();
00129         }
00130 
00131         foreach ( $segments as $i => $segment ) {
00132             // Yes strlen
00133             $len = strlen( $segment );
00134             if ( $len < 4 || $len > 15 ) {
00135                 unset( $segments[$i] );
00136             }
00137         }
00138 
00139         $segments = array_unique( $segments );
00140         $segments = array_slice( $segments, 0, 10 );
00141         wfProfileOut( __METHOD__ );
00142 
00143         return $segments;
00144     }
00145 
00146     public function beginBootstrap() {
00147         $dbw = $this->getDB( DB_MASTER );
00148         $dbw->delete( 'translate_tms', '*', __METHOD__ );
00149         $dbw->delete( 'translate_tmt', '*', __METHOD__ );
00150         $dbw->delete( 'translate_tmf', '*', __METHOD__ );
00151         $table = $dbw->tableName( 'translate_tmf' );
00152         try {
00153             $dbw->query( "DROP INDEX tmf_text ON $table" );
00154         } catch ( DBQueryError $e ) {
00155             // Perhaps the script was aborted before it got
00156             // chance to add the index back.
00157         }
00158     }
00159 
00160     public function beginBatch() {
00161         $this->sids = array();
00162     }
00163 
00164     public function batchInsertDefinitions( array $batch ) {
00165         foreach ( $batch as $key => $item ) {
00166             list( $title, $language, $text ) = $item;
00167             $handle = new MessageHandle( $title );
00168             $context = Title::makeTitle( $handle->getTitle()->getNamespace(), $handle->getKey() );
00169             $this->sids[$key] = $this->insertSource( $context, $language, $text );
00170         }
00171         wfWaitForSlaves( 10 );
00172     }
00173 
00174     public function batchInsertTranslations( array $batch ) {
00175         $rows = array();
00176         foreach ( $batch as $key => $data ) {
00177             list( , $language, $text ) = $data;
00178             $rows[] = array(
00179                 'tmt_sid' => $this->sids[$key],
00180                 'tmt_lang' => $language,
00181                 'tmt_text' => $text,
00182             );
00183         }
00184 
00185         $dbw = $this->getDB( DB_MASTER );
00186         $dbw->insert( 'translate_tmt', $rows, __METHOD__ );
00187         wfWaitForSlaves( 10 );
00188     }
00189 
00190     public function endBatch() {
00191     }
00192 
00193     public function endBootstrap() {
00194         $dbw = $this->getDB( DB_MASTER );
00195         $table = $dbw->tableName( 'translate_tmf' );
00196         $dbw->query( "CREATE FULLTEXT INDEX tmf_text ON $table (tmf_text)" );
00197     }
00198 
00199     /* Reading interface */
00200 
00201     public function isLocalSuggestion( array $suggestion ) {
00202         return true;
00203     }
00204 
00205     public function expandLocation( array $suggestion ) {
00206         $title = Title::newFromText( $suggestion['location'] );
00207 
00208         return $title->getCanonicalUrl();
00209     }
00210 
00211     public function query( $sourceLanguage, $targetLanguage, $text ) {
00212         wfProfileIn( __METHOD__ );
00213         // Calculate the bounds of the string length which are able
00214         // to satisfy the cutoff percentage in edit distance.
00215         $len = mb_strlen( $text );
00216         $min = ceil( max( $len * $this->config['cutoff'], 2 ) );
00217         $max = floor( $len / $this->config['cutoff'] );
00218 
00219         // We could use fulltext index to narrow the results further
00220         $dbr = $this->getDB( DB_SLAVE );
00221         $tables = array( 'translate_tmt', 'translate_tms' );
00222         $fields = array( 'tms_context', 'tms_text', 'tmt_lang', 'tmt_text' );
00223 
00224         $conds = array(
00225             'tms_lang' => $sourceLanguage,
00226             'tmt_lang' => $targetLanguage,
00227             "tms_len BETWEEN $min AND $max",
00228             'tms_sid = tmt_sid',
00229         );
00230 
00231         $fulltext = $this->filterForFulltext( $sourceLanguage, $text );
00232         if ( $fulltext ) {
00233             $tables[] = 'translate_tmf';
00234             $list = implode( ' ', $fulltext );
00235             $conds[] = 'tmf_sid = tmt_sid';
00236             $conds[] = "MATCH(tmf_text) AGAINST( '$list' )";
00237         }
00238 
00239         $res = $dbr->select( $tables, $fields, $conds, __METHOD__ );
00240         wfProfileOut( __METHOD__ );
00241 
00242         return $this->processQueryResults( $res, $text, $targetLanguage );
00243     }
00244 
00245     protected function processQueryResults( $res, $text, $targetLanguage ) {
00246         wfProfileIn( __METHOD__ );
00247         $timeLimit = microtime( true ) + 5;
00248 
00249         $lenA = mb_strlen( $text );
00250         $results = array();
00251         foreach ( $res as $row ) {
00252             if ( microtime( true ) > $timeLimit ) {
00253                 // Having no suggestions is better than preventing translation
00254                 // altogether by timing out the request :(
00255                 break;
00256             }
00257 
00258             $a = $text;
00259             $b = $row->tms_text;
00260             $lenB = mb_strlen( $b );
00261             $len = min( $lenA, $lenB );
00262             if ( $len > 600 ) {
00263                 // two strings of length 1500 ~ 10s
00264                 // two strings of length 2250 ~ 30s
00265                 $dist = $len;
00266             } else {
00267                 $dist = self::levenshtein( $a, $b, $lenA, $lenB );
00268             }
00269             $quality = 1 - ( $dist * 0.9 / $len );
00270 
00271             if ( $quality >= $this->config['cutoff'] ) {
00272                 $results[] = array(
00273                     'source' => $row->tms_text,
00274                     'target' => $row->tmt_text,
00275                     'context' => $row->tms_context,
00276                     'location' => $row->tms_context . '/' . $targetLanguage,
00277                     'quality' => $quality,
00278                     'wiki' => isset( $row->tms_wiki ) ? $row->tms_wiki : wfWikiId(),
00279                 );
00280             }
00281         }
00282         $results = TTMServer::sortSuggestions( $results );
00283         wfProfileOut( __METHOD__ );
00284 
00285         return $results;
00286     }
00287 }
Generated on Tue Oct 29 00:00:25 2013 for MediaWiki Translate Extension by  doxygen 1.6.3