function drupal_html_to_text

Transforms an HTML string into plain text, preserving its structure.

The output will be suitable for use as 'format=flowed; delsp=yes' text (RFC 3676) and can be passed directly to drupal_mail() for sending.

We deliberately use LF rather than CRLF, see drupal_mail().

This function provides suitable alternatives for the following tags: <a> <em> <i> <strong> <b> <br> <p> <blockquote> <ul> <ol> <li> <dl> <dt> <dd> <h1> <h2> <h3> <h4> <h5> <h6> <hr>


$string: The string to be transformed.

$allowed_tags (optional): If supplied, a list of tags that will be transformed. If omitted, all all supported tags are transformed.

Return value

The transformed string.

drupal/includes/, line 402
API functions for processing and sending e-mail.


function drupal_html_to_text($string, $allowed_tags = NULL) {

  // Cache list of supported tags.
  static $supported_tags;
  if (empty($supported_tags)) {
    $supported_tags = array(

  // Make sure only supported tags are kept.
  $allowed_tags = isset($allowed_tags) ? array_intersect($supported_tags, $allowed_tags) : $supported_tags;

  // Make sure tags, entities and attributes are well-formed and properly nested.
  $string = _filter_htmlcorrector(filter_xss($string, $allowed_tags));

  // Apply inline styles.
  $string = preg_replace('!</?(em|i)((?> +)[^>]*)?>!i', '/', $string);
  $string = preg_replace('!</?(strong|b)((?> +)[^>]*)?>!i', '*', $string);

  // Replace inline <a> tags with the text of link and a footnote.
  // 'See <a href="">the Drupal site</a>' becomes
  // 'See the Drupal site [1]' with the URL included as a footnote.
  _drupal_html_to_mail_urls(NULL, TRUE);
  $pattern = '@(<a[^>]+?href="([^"]*)"[^>]*?>(.+?)</a>)@i';
  $string = preg_replace_callback($pattern, '_drupal_html_to_mail_urls', $string);
  $urls = _drupal_html_to_mail_urls();
  $footnotes = '';
  if (count($urls)) {
    $footnotes .= "\n";
    for ($i = 0, $max = count($urls); $i < $max; $i++) {
      $footnotes .= '[' . ($i + 1) . '] ' . $urls[$i] . "\n";

  // Split tags from text.
  $split = preg_split('/<([^>]+?)>/', $string, -1, PREG_SPLIT_DELIM_CAPTURE);

  // Note: PHP ensures the array consists of alternating delimiters and literals
  // and begins and ends with a literal (inserting $null as required).
  $tag = FALSE;

  // Odd/even counter (tag or no tag)
  $casing = NULL;

  // Case conversion function
  $output = '';
  $indent = array();

  // All current indentation string chunks
  $lists = array();

  // Array of counters for opened lists
  foreach ($split as $value) {
    $chunk = NULL;

    // Holds a string ready to be formatted and output.
    // Process HTML tags (but don't output any literally).
    if ($tag) {
      list($tagname) = explode(' ', strtolower($value), 2);
      switch ($tagname) {

        // List counters
        case 'ul':
          array_unshift($lists, '*');
        case 'ol':
          array_unshift($lists, 1);
        case '/ul':
        case '/ol':
          $chunk = '';

          // Ensure blank new-line.

        // Quotation/list markers, non-fancy headers
        case 'blockquote':

          // Format=flowed indentation cannot be mixed with lists.
          $indent[] = count($lists) ? ' "' : '>';
        case 'li':
          $indent[] = isset($lists[0]) && is_numeric($lists[0]) ? ' ' . $lists[0]++ . ') ' : ' * ';
        case 'dd':
          $indent[] = '    ';
        case 'h3':
          $indent[] = '.... ';
        case 'h4':
          $indent[] = '.. ';
        case '/blockquote':
          if (count($lists)) {

            // Append closing quote for inline quotes (immediately).
            $output = rtrim($output, "> \n") . "\"\n";
            $chunk = '';

            // Ensure blank new-line.

        // Fall-through
        case '/li':
        case '/dd':
        case '/h3':
        case '/h4':
        case '/h5':
        case '/h6':
          $chunk = '';

          // Ensure blank new-line.

        // Fancy headers
        case 'h1':
          $indent[] = '======== ';
          $casing = 'drupal_strtoupper';
        case 'h2':
          $indent[] = '-------- ';
          $casing = 'drupal_strtoupper';
        case '/h1':
        case '/h2':
          $casing = NULL;

          // Pad the line with dashes.
          $output = _drupal_html_to_text_pad($output, $tagname == '/h1' ? '=' : '-', ' ');
          $chunk = '';

          // Ensure blank new-line.

        // Horizontal rulers
        case 'hr':

          // Insert immediately.
          $output .= drupal_wrap_mail('', implode('', $indent)) . "\n";
          $output = _drupal_html_to_text_pad($output, '-');

        // Paragraphs and definition lists
        case '/p':
        case '/dl':
          $chunk = '';

          // Ensure blank new-line.
    else {

      // Convert inline HTML text to plain text; not removing line-breaks or
      // white-space, since that breaks newlines when sanitizing plain-text.
      $value = trim(decode_entities($value));
      if (drupal_strlen($value)) {
        $chunk = $value;

    // See if there is something waiting to be output.
    if (isset($chunk)) {

      // Apply any necessary case conversion.
      if (isset($casing)) {
        $chunk = $casing($chunk);

      // Format it and apply the current indentation.
      $output .= drupal_wrap_mail($chunk, implode('', $indent)) . MAIL_LINE_ENDINGS;

      // Remove non-quotation markers from indentation.
      $indent = array_map('_drupal_html_to_text_clean', $indent);
    $tag = !$tag;
  return $output . $footnotes;