PoStreamReader.php

Definition of Drupal\Component\Gettext\PoStreamReader.

Namespace

Drupal\Component\Gettext

File

drupal/core/lib/Drupal/Component/Gettext/PoStreamReader.php
View source
<?php

/**
 * @file
 * Definition of Drupal\Component\Gettext\PoStreamReader.
 */
namespace Drupal\Component\Gettext;

use Drupal\Component\Gettext\PoReaderInterface;
use Drupal\Component\Gettext\PoStreamInterface;
use Drupal\Component\Gettext\PoHeader;

/**
 * Implements Gettext PO stream reader.
 *
 * The PO file format parsing is implemented according to the documentation at
 * http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files
 */
class PoStreamReader implements PoStreamInterface, PoReaderInterface {

  /**
   * Source line number of the stream being parsed.
   *
   * @var int
   */
  private $_line_number = 0;

  /**
   * Parser context for the stream reader state machine.
   *
   * Possible contexts are:
   *  - 'COMMENT' (#)
   *  - 'MSGID' (msgid)
   *  - 'MSGID_PLURAL' (msgid_plural)
   *  - 'MSGCTXT' (msgctxt)
   *  - 'MSGSTR' (msgstr or msgstr[])
   *  - 'MSGSTR_ARR' (msgstr_arg)
   *
   * @var string
   */
  private $_context = 'COMMENT';

  /**
   * Current entry being read. Incomplete.
   *
   * @var array
   */
  private $_current_item = array();

  /**
   * Current plural index for plural translations.
   *
   * @var int
   */
  private $_current_plural_index = 0;

  /**
   * URI of the PO stream that is being read.
   *
   * @var string
   */
  private $_uri = '';

  /**
   * Language code for the PO stream being read.
   *
   * @var string
   */
  private $_langcode = NULL;

  /**
   * Size of the current PO stream.
   *
   * @var int
   */
  private $_size;

  /**
   * File handle of the current PO stream.
   *
   * @var resource
   */
  private $_fd;

  /**
   * The PO stream header.
   *
   * @var Drupal\Component\Gettext\PoHeader
   */
  private $_header;

  /**
   * Object wrapper for the last read source/translation pair.
   *
   * @var Drupal\Component\Gettext\PoItem
   */
  private $_last_item;

  /**
   * Indicator of whether the stream reading is finished.
   *
   * @var boolean
   */
  private $_finished;

  /**
   * Array of translated error strings recorded on reading this stream so far.
   *
   * @var array
   */
  private $_errors;

  /**
   * Implements Drupal\Component\Gettext\PoMetadataInterface::getLangcode().
   */
  public function getLangcode() {
    return $this->_langcode;
  }

  /**
   * Implements Drupal\Component\Gettext\PoMetadataInterface::setLangcode().
   */
  public function setLangcode($langcode) {
    $this->_langcode = $langcode;
  }

  /**
   * Implements Drupal\Component\Gettext\PoMetadataInterface::getHeader().
   */
  public function getHeader() {
    return $this->_header;
  }

  /**
   * Implements Drupal\Component\Gettext\PoMetadataInterface::setHeader().
   *
   * Not applicable to stream reading and therefore not implemented.
   */
  public function setHeader(PoHeader $header) {
  }

  /**
   * Implements Drupal\Component\Gettext\PoStreamInterface::getURI().
   */
  public function getURI() {
    return $this->_uri;
  }

  /**
   * Implements Drupal\Component\Gettext\PoStreamInterface::setURI().
   */
  public function setURI($uri) {
    $this->_uri = $uri;
  }

  /**
   * Implements Drupal\Component\Gettext\PoStreamInterface::open().
   *
   * Opens the stream and reads the header. The stream is ready for reading
   * items after.
   *
   * @throws Exception
   *   If the URI is not yet set.
   */
  public function open() {
    if (!empty($this->_uri)) {
      $this->_fd = fopen($this->_uri, 'rb');
      $this->_size = ftell($this->_fd);
      $this
        ->readHeader();
    }
    else {
      throw new \Exception('Cannot open stream without URI set.');
    }
  }

  /**
   * Implements Drupal\Component\Gettext\PoStreamInterface::close().
   *
   * @throws Exception
   *   If the stream is not open.
   */
  public function close() {
    if ($this->_fd) {
      fclose($this->_fd);
    }
    else {
      throw new \Exception('Cannot close stream that is not open.');
    }
  }

  /**
   * Implements Drupal\Component\Gettext\PoReaderInterface::readItem().
   */
  public function readItem() {

    // Clear out the last item.
    $this->_last_item = NULL;

    // Read until finished with the stream or a complete item was identified.
    while (!$this->_finished && is_null($this->_last_item)) {
      $this
        ->readLine();
    }
    return $this->_last_item;
  }

  /**
   * Sets the seek position for the current PO stream.
   *
   * @param int $seek
   *   The new seek position to set.
   */
  public function setSeek($seek) {
    fseek($this->_fd, $seek);
  }

  /**
   * Returns the pointer position of the current PO stream.
   */
  public function getSeek() {
    return ftell($this->_fd);
  }

  /**
   * Read the header from the PO stream.
   *
   * The header is a special case PoItem, using the empty string as source and
   * key-value pairs as translation. We just reuse the item reader logic to
   * read the header.
   */
  private function readHeader() {
    $item = $this
      ->readItem();

    // Handle the case properly when the .po file is empty (0 bytes).
    if (!$item) {
      return;
    }
    $header = new PoHeader();
    $header
      ->setFromString(trim($item
      ->getTranslation()));
    $this->_header = $header;
  }

  /**
   * Reads a line from the PO stream and stores data internally.
   *
   * Expands $this->_current_item based on new data for the current item. If
   * this line ends the current item, it is saved with setItemFromArray() with
   * data from $this->_current_item.
   *
   * An internal state machine is maintained in this reader using $this->_context
   * as the reading state. PO items are inbetween COMMENT states (when items have
   * at least one line or comment inbetween them or indicated by MSGSTR or
   * MSGSTR_ARR followed immediately by an MSGID or MSGCTXT (when items closely
   * follow each other).
   *
   * @return
   *   FALSE if an error was logged, NULL otherwise. The errors are considered
   *   non-blocking, so reading can continue, while the errors are collected
   *   for later presentation.
   */
  private function readLine() {

    // Read a line and set the stream finished indicator if it was not
    // possible anymore.
    $line = fgets($this->_fd);
    $this->_finished = $line === FALSE;
    if (!$this->_finished) {
      if ($this->_line_number == 0) {

        // The first line might come with a UTF-8 BOM, which should be removed.
        $line = str_replace("", '', $line);

        // Current plurality for 'msgstr[]'.
        $this->_current_plural_index = 0;
      }

      // Track the line number for error reporting.
      $this->_line_number++;

      // Initialize common values for error logging.
      $log_vars = array(
        '%uri' => $this
          ->getURI(),
        '%line' => $this->_line_number,
      );
      $t = get_t();

      // Trim away the linefeed. \\n might appear at the end of the string if
      // another line continuing the same string follows. We can remove that.
      $line = trim(strtr($line, array(
        "\\\n" => "",
      )));
      if (!strncmp('#', $line, 1)) {

        // Lines starting with '#' are comments.
        if ($this->_context == 'COMMENT') {

          // Already in comment context, add to current comment.
          $this->_current_item['#'][] = substr($line, 1);
        }
        elseif ($this->_context == 'MSGSTR' || $this->_context == 'MSGSTR_ARR') {

          // We are currently in string context, save current item.
          $this
            ->setItemFromArray($this->_current_item);

          // Start a new entry for the comment.
          $this->_current_item = array();
          $this->_current_item['#'][] = substr($line, 1);
          $this->_context = 'COMMENT';
          return;
        }
        else {

          // A comment following any other context is a syntax error.
          $this->_errors[] = $t('The translation stream %uri contains an error: "msgstr" was expected but not found on line %line.', $log_vars);
          return FALSE;
        }
        return;
      }
      elseif (!strncmp('msgid_plural', $line, 12)) {

        // A plural form for the current source string.
        if ($this->_context != 'MSGID') {

          // A plural form can only be added to an msgid directly.
          $this->_errors[] = $t('The translation stream %uri contains an error: "msgid_plural" was expected but not found on line %line.', $log_vars);
          return FALSE;
        }

        // Remove 'msgid_plural' and trim away whitespace.
        $line = trim(substr($line, 12));

        // Only the plural source string is left, parse it.
        $quoted = $this
          ->parseQuoted($line);
        if ($quoted === FALSE) {

          // The plural form must be wrapped in quotes.
          $this->_errors[] = $t('The translation stream %uri contains a syntax error on line %line.', $log_vars);
          return FALSE;
        }

        // Append the plural source to the current entry.
        if (is_string($this->_current_item['msgid'])) {

          // The first value was stored as string. Now we know the context is
          // plural, it is converted to array.
          $this->_current_item['msgid'] = array(
            $this->_current_item['msgid'],
          );
        }
        $this->_current_item['msgid'][] = $quoted;
        $this->_context = 'MSGID_PLURAL';
        return;
      }
      elseif (!strncmp('msgid', $line, 5)) {

        // Starting a new message.
        if ($this->_context == 'MSGSTR' || $this->_context == 'MSGSTR_ARR') {

          // We are currently in string context, save current item.
          $this
            ->setItemFromArray($this->_current_item);

          // Start a new context for the msgid.
          $this->_current_item = array();
        }
        elseif ($this->_context == 'MSGID') {

          // We are currently already in the context, meaning we passed an id with no data.
          $this->_errors[] = $t('The translation stream %uri contains an error: "msgid" is unexpected on line %line.', $log_vars);
          return FALSE;
        }

        // Remove 'msgid' and trim away whitespace.
        $line = trim(substr($line, 5));

        // Only the message id string is left, parse it.
        $quoted = $this
          ->parseQuoted($line);
        if ($quoted === FALSE) {

          // The message id must be wrapped in quotes.
          $this->_errors[] = $t('The translation stream %uri contains an error: invalid format for "msgid" on line %line.', $log_vars, $log_vars);
          return FALSE;
        }
        $this->_current_item['msgid'] = $quoted;
        $this->_context = 'MSGID';
        return;
      }
      elseif (!strncmp('msgctxt', $line, 7)) {

        // Starting a new context.
        if ($this->_context == 'MSGSTR' || $this->_context == 'MSGSTR_ARR') {

          // We are currently in string context, save current item.
          $this
            ->setItemFromArray($this->_current_item);
          $this->_current_item = array();
        }
        elseif (!empty($this->_current_item['msgctxt'])) {

          // A context cannot apply to another context.
          $this->_errors[] = $t('The translation stream %uri contains an error: "msgctxt" is unexpected on line %line.', $log_vars);
          return FALSE;
        }

        // Remove 'msgctxt' and trim away whitespaces.
        $line = trim(substr($line, 7));

        // Only the msgctxt string is left, parse it.
        $quoted = $this
          ->parseQuoted($line);
        if ($quoted === FALSE) {

          // The context string must be quoted.
          $this->_errors[] = $t('The translation stream %uri contains an error: invalid format for "msgctxt" on line %line.', $log_vars);
          return FALSE;
        }
        $this->_current_item['msgctxt'] = $quoted;
        $this->_context = 'MSGCTXT';
        return;
      }
      elseif (!strncmp('msgstr[', $line, 7)) {

        // A message string for a specific plurality.
        if ($this->_context != 'MSGID' && $this->_context != 'MSGCTXT' && $this->_context != 'MSGID_PLURAL' && $this->_context != 'MSGSTR_ARR') {

          // Plural message strings must come after msgid, msgxtxt,
          // msgid_plural, or other msgstr[] entries.
          $this->_errors[] = $t('The translation stream %uri contains an error: "msgstr[]" is unexpected on line %line.', $log_vars);
          return FALSE;
        }

        // Ensure the plurality is terminated.
        if (strpos($line, ']') === FALSE) {
          $this->_errors[] = $t('The translation stream %uri contains an error: invalid format for "msgstr[]" on line %line.', $log_vars);
          return FALSE;
        }

        // Extract the plurality.
        $frombracket = strstr($line, '[');
        $this->_current_plural_index = substr($frombracket, 1, strpos($frombracket, ']') - 1);

        // Skip to the next whitespace and trim away any further whitespace,
        // bringing $line to the message text only.
        $line = trim(strstr($line, " "));
        $quoted = $this
          ->parseQuoted($line);
        if ($quoted === FALSE) {

          // The string must be quoted.
          $this->_errors[] = $t('The translation stream %uri contains an error: invalid format for "msgstr[]" on line %line.', $log_vars);
          return FALSE;
        }
        if (!isset($this->_current_item['msgstr']) || !is_array($this->_current_item['msgstr'])) {
          $this->_current_item['msgstr'] = array();
        }
        $this->_current_item['msgstr'][$this->_current_plural_index] = $quoted;
        $this->_context = 'MSGSTR_ARR';
        return;
      }
      elseif (!strncmp("msgstr", $line, 6)) {

        // A string pair for an msgidid (with optional context).
        if ($this->_context != 'MSGID' && $this->_context != 'MSGCTXT') {

          // Strings are only valid within an id or context scope.
          $this->_errors[] = $t('The translation stream %uri contains an error: "msgstr" is unexpected on line %line.', $log_vars);
          return FALSE;
        }

        // Remove 'msgstr' and trim away away whitespaces.
        $line = trim(substr($line, 6));

        // Only the msgstr string is left, parse it.
        $quoted = $this
          ->parseQuoted($line);
        if ($quoted === FALSE) {

          // The string must be quoted.
          $this->_errors[] = $t('The translation stream %uri contains an error: invalid format for "msgstr" on line %line.', $log_vars);
          return FALSE;
        }
        $this->_current_item['msgstr'] = $quoted;
        $this->_context = 'MSGSTR';
        return;
      }
      elseif ($line != '') {

        // Anything that is not a token may be a continuation of a previous token.
        $quoted = $this
          ->parseQuoted($line);
        if ($quoted === FALSE) {

          // This string must be quoted.
          $this->_errors[] = $t('The translation stream %uri contains an error: string continuation expected on line %line.', $log_vars);
          return FALSE;
        }

        // Append the string to the current item.
        if ($this->_context == 'MSGID' || $this->_context == 'MSGID_PLURAL') {
          if (is_array($this->_current_item['msgid'])) {

            // Add string to last array element for plural sources.
            $last_index = count($this->_current_item['msgid']) - 1;
            $this->_current_item['msgid'][$last_index] .= $quoted;
          }
          else {

            // Singular source, just append the string.
            $this->_current_item['msgid'] .= $quoted;
          }
        }
        elseif ($this->_context == 'MSGCTXT') {

          // Multiline context name.
          $this->_current_item['msgctxt'] .= $quoted;
        }
        elseif ($this->_context == 'MSGSTR') {

          // Multiline translation string.
          $this->_current_item['msgstr'] .= $quoted;
        }
        elseif ($this->_context == 'MSGSTR_ARR') {

          // Multiline plural translation string.
          $this->_current_item['msgstr'][$this->_current_plural_index] .= $quoted;
        }
        else {

          // No valid context to append to.
          $this->_errors[] = $t('The translation stream %uri contains an error: unexpected string on line %line.', $log_vars);
          return FALSE;
        }
        return;
      }
    }

    // Empty line read or EOF of PO stream, close out the last entry.
    if ($this->_context == 'MSGSTR' || $this->_context == 'MSGSTR_ARR') {
      $this
        ->setItemFromArray($this->_current_item);
      $this->_current_item = array();
    }
    elseif ($this->_context != 'COMMENT') {
      $this->_errors[] = $t('The translation stream %uri ended unexpectedly at line %line.', $log_vars);
      return FALSE;
    }
  }

  /**
   * Store the parsed values as a PoItem object.
   */
  public function setItemFromArray($value) {
    $plural = FALSE;
    $comments = '';
    if (isset($value['#'])) {
      $comments = $this
        ->shortenComments($value['#']);
    }
    if (is_array($value['msgstr'])) {

      // Sort plural variants by their form index.
      ksort($value['msgstr']);
      $plural = TRUE;
    }
    $item = new PoItem();
    $item
      ->setContext(isset($value['msgctxt']) ? $value['msgctxt'] : '');
    $item
      ->setSource($value['msgid']);
    $item
      ->setTranslation($value['msgstr']);
    $item
      ->setPlural($plural);
    $item
      ->setComment($comments);
    $item
      ->setLangcode($this->_langcode);
    $this->_last_item = $item;
    $this->_context = 'COMMENT';
  }

  /**
   * Parses a string in quotes.
   *
   * @param $string
   *   A string specified with enclosing quotes.
   *
   * @return
   *   The string parsed from inside the quotes.
   */
  function parseQuoted($string) {
    if (substr($string, 0, 1) != substr($string, -1, 1)) {

      // Start and end quotes must be the same.
      return FALSE;
    }
    $quote = substr($string, 0, 1);
    $string = substr($string, 1, -1);
    if ($quote == '"') {

      // Double quotes: strip slashes.
      return stripcslashes($string);
    }
    elseif ($quote == "'") {

      // Simple quote: return as-is.
      return $string;
    }
    else {

      // Unrecognized quote.
      return FALSE;
    }
  }

  /**
   * Generates a short, one-string version of the passed comment array.
   *
   * @param $comment
   *   An array of strings containing a comment.
   *
   * @return
   *   Short one-string version of the comment.
   */
  private function shortenComments($comment) {
    $comm = '';
    while (count($comment)) {
      $test = $comm . substr(array_shift($comment), 1) . ', ';
      if (strlen($comm) < 130) {
        $comm = $test;
      }
      else {
        break;
      }
    }
    return trim(substr($comm, 0, -2));
  }

}

Classes

Namesort descending Description
PoStreamReader Implements Gettext PO stream reader.