aggregator.module

Used to aggregate syndicated content (RSS, RDF, and Atom).

File

drupal/core/modules/aggregator/aggregator.module
View source
<?php

/**
 * @file
 * Used to aggregate syndicated content (RSS, RDF, and Atom).
 */
use Drupal\aggregator\Plugin\Core\Entity\Feed;
use Drupal\Component\Plugin\Exception\PluginException;

/**
 * Denotes that a feed's items should never expire.
 */
const AGGREGATOR_CLEAR_NEVER = 0;

/**
 * Implements hook_help().
 */
function aggregator_help($path, $arg) {
  switch ($path) {
    case 'admin/help#aggregator':
      $output = '';
      $output .= '<h3>' . t('About') . '</h3>';
      $output .= '<p>' . t('The Aggregator module is an on-site syndicator and news reader that gathers and displays fresh content from RSS-, RDF-, and Atom-based feeds made available across the web. Thousands of sites (particularly news sites and blogs) publish their latest headlines in feeds, using a number of standardized XML-based formats. For more information, see the online handbook entry for <a href="@aggregator-module">Aggregator module</a>.', array(
        '@aggregator-module' => 'http://drupal.org/documentation/modules/aggregator',
        '@aggregator' => url('aggregator'),
      )) . '</p>';
      $output .= '<h3>' . t('Uses') . '</h3>';
      $output .= '<dl>';
      $output .= '<dt>' . t('Viewing feeds') . '</dt>';
      $output .= '<dd>' . t('Feeds contain published content, and may be grouped in categories, generally by topic. Users view feed content in the <a href="@aggregator">main aggregator display</a>, or by <a href="@aggregator-sources">their source</a> (usually via an RSS feed reader). The most recent content in a feed or category can be displayed as a block through the <a href="@admin-block">Blocks administration page</a>.', array(
        '@aggregator' => url('aggregator'),
        '@aggregator-sources' => url('aggregator/sources'),
        '@admin-block' => url('admin/structure/block'),
      )) . '</a></dd>';
      $output .= '<dt>' . t('Adding, editing, and deleting feeds') . '</dt>';
      $output .= '<dd>' . t('Administrators can add, edit, and delete feeds, and choose how often to check each feed for newly updated items on the <a href="@feededit">Feed aggregator administration page</a>.', array(
        '@feededit' => url('admin/config/services/aggregator'),
      )) . '</dd>';
      $output .= '<dt>' . t('OPML integration') . '</dt>';
      $output .= '<dd>' . t('A <a href="@aggregator-opml">machine-readable OPML file</a> of all feeds is available. OPML is an XML-based file format used to share outline-structured information such as a list of RSS feeds. Feeds can also be <a href="@import-opml">imported via an OPML file</a>.', array(
        '@aggregator-opml' => url('aggregator/opml'),
        '@import-opml' => url('admin/config/services/aggregator'),
      )) . '</dd>';
      $output .= '<dt>' . t('Configuring cron') . '</dt>';
      $output .= '<dd>' . t('A correctly configured <a href="@cron">cron maintenance task</a> is required to update feeds automatically.', array(
        '@cron' => 'http://drupal.org/cron',
      )) . '</dd>';
      $output .= '</dl>';
      return $output;
    case 'admin/config/services/aggregator':
      $output = '<p>' . t('Thousands of sites (particularly news sites and blogs) publish their latest headlines and posts in feeds, using a number of standardized XML-based formats. Formats supported by the aggregator include <a href="@rss">RSS</a>, <a href="@rdf">RDF</a>, and <a href="@atom">Atom</a>.', array(
        '@rss' => 'http://cyber.law.harvard.edu/rss/',
        '@rdf' => 'http://www.w3.org/RDF/',
        '@atom' => 'http://www.atomenabled.org',
      )) . '</p>';
      $output .= '<p>' . t('Current feeds are listed below, and <a href="@addfeed">new feeds may be added</a>. For each feed or feed category, the <em>latest items</em> block may be enabled at the <a href="@block">blocks administration page</a>.', array(
        '@addfeed' => url('admin/config/services/aggregator/add/feed'),
        '@block' => url('admin/structure/block'),
      )) . '</p>';
      return $output;
    case 'admin/config/services/aggregator/add/feed':
      return '<p>' . t('Add a feed in RSS, RDF or Atom format. A feed may only have one entry.') . '</p>';
    case 'admin/config/services/aggregator/add/category':
      return '<p>' . t('Categories allow feed items from different feeds to be grouped together. For example, several sport-related feeds may belong to a category named <em>Sports</em>. Feed items may be grouped automatically (by selecting a category when creating or editing a feed) or manually (via the <em>Categorize</em> page available from feed item listings). Each category provides its own feed page and block.') . '</p>';
    case 'admin/config/services/aggregator/add/opml':
      return '<p>' . t('<acronym title="Outline Processor Markup Language">OPML</acronym> is an XML format used to exchange multiple feeds between aggregators. A single OPML document may contain a collection of many feeds. Drupal can parse such a file and import all feeds at once, saving you the effort of adding them manually. You may either upload a local file from your computer or enter a URL where Drupal can download it.') . '</p>';
  }
}

/**
 * Implements hook_theme().
 */
function aggregator_theme() {
  return array(
    'aggregator_feed_source' => array(
      'variables' => array(
        'aggregator_feed' => NULL,
        'view_mode' => NULL,
      ),
      'file' => 'aggregator.pages.inc',
      'template' => 'aggregator-feed-source',
    ),
    'aggregator_block_item' => array(
      'variables' => array(
        'item' => NULL,
        'feed' => 0,
      ),
    ),
    'aggregator_summary_items' => array(
      'variables' => array(
        'summary_items' => NULL,
        'source' => NULL,
      ),
      'file' => 'aggregator.pages.inc',
      'template' => 'aggregator-summary-items',
    ),
    'aggregator_summary_item' => array(
      'variables' => array(
        'aggregator_item' => NULL,
        'view_mode' => NULL,
      ),
      'file' => 'aggregator.pages.inc',
    ),
    'aggregator_item' => array(
      'variables' => array(
        'aggregator_item' => NULL,
        'view_mode' => NULL,
      ),
      'file' => 'aggregator.pages.inc',
      'template' => 'aggregator-item',
    ),
    'aggregator_page_opml' => array(
      'variables' => array(
        'feeds' => NULL,
      ),
      'file' => 'aggregator.pages.inc',
    ),
    'aggregator_page_rss' => array(
      'variables' => array(
        'feeds' => NULL,
        'category' => NULL,
      ),
      'file' => 'aggregator.pages.inc',
    ),
  );
}

/**
 * Implements hook_menu().
 */
function aggregator_menu() {
  $items['admin/config/services/aggregator'] = array(
    'title' => 'Feed aggregator',
    'description' => "Configure which content your site aggregates from other sites, how often it polls them, and how they're categorized.",
    'route_name' => 'aggregator_admin_overview',
    'weight' => 10,
  );
  $items['admin/config/services/aggregator/add/feed'] = array(
    'title' => 'Add feed',
    'route_name' => 'aggregator_feed_add',
    'type' => MENU_LOCAL_ACTION,
  );
  $items['admin/config/services/aggregator/add/category'] = array(
    'title' => 'Add category',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'aggregator_form_category',
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'type' => MENU_LOCAL_ACTION,
    'file' => 'aggregator.admin.inc',
  );
  $items['admin/config/services/aggregator/add/opml'] = array(
    'title' => 'Import OPML',
    'type' => MENU_LOCAL_ACTION,
    'route_name' => 'aggregator_opml_add',
  );
  $items['admin/config/services/aggregator/remove/%aggregator_feed'] = array(
    'title' => 'Remove items',
    'route_name' => 'aggregator_feed_items_delete',
  );
  $items['admin/config/services/aggregator/update/%aggregator_feed'] = array(
    'title' => 'Update items',
    'route_name' => 'aggregator_feed_refresh',
  );
  $items['admin/config/services/aggregator/list'] = array(
    'title' => 'List',
    'type' => MENU_DEFAULT_LOCAL_TASK,
  );
  $items['admin/config/services/aggregator/settings'] = array(
    'title' => 'Settings',
    'description' => 'Configure the behavior of the feed aggregator, including when to discard feed items and how to present feed items and categories.',
    'route_name' => 'aggregator_admin_settings',
    'type' => MENU_LOCAL_TASK,
    'weight' => 100,
  );
  $items['aggregator'] = array(
    'title' => 'Feed aggregator',
    'weight' => 5,
    'route_name' => 'aggregator_page_last',
  );
  $items['aggregator/sources'] = array(
    'title' => 'Sources',
    'route_name' => 'aggregator_sources',
  );
  $items['aggregator/categories'] = array(
    'title' => 'Categories',
    'page callback' => 'aggregator_page_categories',
    'access callback' => '_aggregator_has_categories',
    'file' => 'aggregator.pages.inc',
  );
  $items['aggregator/rss'] = array(
    'title' => 'RSS feed',
    'page callback' => 'aggregator_page_rss',
    'access arguments' => array(
      'access news feeds',
    ),
    'type' => MENU_CALLBACK,
    'file' => 'aggregator.pages.inc',
  );
  $items['aggregator/opml'] = array(
    'title' => 'OPML feed',
    'page callback' => 'aggregator_page_opml',
    'access arguments' => array(
      'access news feeds',
    ),
    'type' => MENU_CALLBACK,
    'file' => 'aggregator.pages.inc',
  );
  $items['aggregator/categories/%aggregator_category'] = array(
    'title callback' => '_aggregator_category_title',
    'title arguments' => array(
      2,
    ),
    'page callback' => 'aggregator_page_category',
    'page arguments' => array(
      2,
    ),
    'access arguments' => array(
      'access news feeds',
    ),
    'file' => 'aggregator.pages.inc',
  );
  $items['aggregator/categories/%aggregator_category/view'] = array(
    'title' => 'View',
    'type' => MENU_DEFAULT_LOCAL_TASK,
  );
  $items['aggregator/categories/%aggregator_category/categorize'] = array(
    'title' => 'Categorize',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'aggregator_page_category_form',
      2,
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'type' => MENU_LOCAL_TASK,
    'file' => 'aggregator.pages.inc',
  );
  $items['aggregator/categories/%aggregator_category/configure'] = array(
    'title' => 'Configure',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'aggregator_form_category',
      2,
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'type' => MENU_LOCAL_TASK,
    'weight' => 10,
    'file' => 'aggregator.admin.inc',
  );
  $items['aggregator/sources/%aggregator_feed'] = array(
    'title callback' => 'entity_page_label',
    'title arguments' => array(
      2,
    ),
    'page callback' => 'aggregator_page_source',
    'page arguments' => array(
      2,
    ),
    'access arguments' => array(
      'access news feeds',
    ),
    'file' => 'aggregator.pages.inc',
  );
  $items['aggregator/sources/%aggregator_feed/view'] = array(
    'title' => 'View',
    'type' => MENU_DEFAULT_LOCAL_TASK,
  );
  $items['aggregator/sources/%aggregator_feed/categorize'] = array(
    'title' => 'Categorize',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'aggregator_page_source_form',
      2,
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'type' => MENU_LOCAL_TASK,
    'file' => 'aggregator.pages.inc',
  );
  $items['aggregator/sources/%aggregator_feed/configure'] = array(
    'title' => 'Configure',
    'page callback' => 'entity_get_form',
    'page arguments' => array(
      2,
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'type' => MENU_LOCAL_TASK,
    'weight' => 10,
    'file' => 'aggregator.admin.inc',
  );
  $items['admin/config/services/aggregator/edit/feed/%aggregator_feed'] = array(
    'title' => 'Edit feed',
    'page callback' => 'entity_get_form',
    'page arguments' => array(
      6,
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'file' => 'aggregator.admin.inc',
  );
  $items['admin/config/services/aggregator/delete/feed/%aggregator_feed'] = array(
    'title' => 'Delete feed',
    'route_name' => 'aggregator_feed_delete',
  );
  $items['admin/config/services/aggregator/edit/category/%aggregator_category'] = array(
    'title' => 'Edit category',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'aggregator_form_category',
      6,
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'file' => 'aggregator.admin.inc',
  );
  return $items;
}

/**
 * Title callback: Returns a title for aggregator category pages.
 *
 * @param $category
 *   An aggregator category.
 *
 * @return
 *   A string with the aggregator category title.
 */
function _aggregator_category_title($category) {
  return $category->title;
}

/**
 * Access callback: Determines whether there are any aggregator categories.
 *
 * @return
 *   TRUE if there is at least one category and the user has access to them;
 *   FALSE otherwise.
 */
function _aggregator_has_categories() {
  return user_access('access news feeds') && (bool) db_query_range('SELECT 1 FROM {aggregator_category}', 0, 1)
    ->fetchField();
}

/**
 * Implements hook_permission().
 */
function aggregator_permission() {
  return array(
    'administer news feeds' => array(
      'title' => t('Administer news feeds'),
    ),
    'access news feeds' => array(
      'title' => t('View news feeds'),
    ),
  );
}

/**
 * Implements hook_cron().
 *
 * Queues news feeds for updates once their refresh interval has elapsed.
 */
function aggregator_cron() {
  $result = db_query('SELECT fid FROM {aggregator_feed} WHERE queued = 0 AND checked + refresh < :time AND refresh <> :never', array(
    ':time' => REQUEST_TIME,
    ':never' => AGGREGATOR_CLEAR_NEVER,
  ));
  $queue = Drupal::queue('aggregator_feeds');
  foreach ($result
    ->fetchCol() as $fid) {
    $feed = aggregator_feed_load($fid);
    if ($queue
      ->createItem($feed)) {

      // Add timestamp to avoid queueing item more than once.
      $feed->queued->value = REQUEST_TIME;
      $feed
        ->save();
    }
  }

  // Remove queued timestamp after 6 hours assuming the update has failed.
  db_update('aggregator_feed')
    ->fields(array(
    'queued' => 0,
  ))
    ->condition('queued', REQUEST_TIME - 3600 * 6, '<')
    ->execute();
}

/**
 * Implements hook_queue_info().
 */
function aggregator_queue_info() {
  $queues['aggregator_feeds'] = array(
    'title' => t('Aggregator refresh'),
    'worker callback' => 'aggregator_refresh',
    'cron' => array(
      'time' => 60,
    ),
  );
  return $queues;
}

/**
 * Adds/edits/deletes aggregator categories.
 *
 * @param $edit
 *   An associative array describing the category to be added/edited/deleted.
 */
function aggregator_save_category($edit) {
  $link_path = 'aggregator/categories/';
  if (!empty($edit['cid'])) {
    $link_path .= $edit['cid'];
    if (!empty($edit['title'])) {
      db_merge('aggregator_category')
        ->key(array(
        'cid' => $edit['cid'],
      ))
        ->fields(array(
        'title' => $edit['title'],
        'description' => $edit['description'],
      ))
        ->execute();
      $op = 'update';
    }
    else {
      db_delete('aggregator_category')
        ->condition('cid', $edit['cid'])
        ->execute();

      // Make sure there is no active block for this category.
      if (module_exists('block')) {
        foreach (entity_load_multiple_by_properties('block', array(
          'plugin' => 'aggregator_category_block:' . $edit['cid'],
        )) as $block) {
          $block
            ->delete();
        }
      }
      $edit['title'] = '';
      $op = 'delete';
    }
  }
  elseif (!empty($edit['title'])) {

    // A single unique id for bundles and feeds, to use in blocks.
    $link_path .= db_insert('aggregator_category')
      ->fields(array(
      'title' => $edit['title'],
      'description' => $edit['description'],
      'block' => 5,
    ))
      ->execute();
    $op = 'insert';
  }
  if (isset($op) && module_exists('menu_link')) {
    menu_link_maintain('aggregator', $op, $link_path, $edit['title']);
  }
}

/**
 * Removes all items from a feed.
 *
 * @param \Drupal\aggregator\Plugin\Core\Entity\Feed $feed
 *   An object describing the feed to be cleared.
 */
function aggregator_remove(Feed $feed) {

  // Call \Drupal\aggregator\Plugin\ProcessorInterface::remove() on all
  // processors.
  $manager = Drupal::service('plugin.manager.aggregator.processor');
  foreach ($manager
    ->getDefinitions() as $id => $definition) {
    $manager
      ->createInstance($id)
      ->remove($feed);
  }

  // Reset feed.
  $feed->checked->value = 0;
  $feed->hash->value = '';
  $feed->etag->value = '';
  $feed->modified->value = 0;
  $feed
    ->save();
}

/**
 * Checks a news feed for new items.
 *
 * @param \Drupal\aggregator\Plugin\Core\Entity\Feed $feed
 *   An object describing the feed to be refreshed.
 */
function aggregator_refresh(Feed $feed) {

  // Store feed URL to track changes.
  $feed_url = $feed->url->value;
  $config = config('aggregator.settings');

  // Fetch the feed.
  $fetcher_manager = Drupal::service('plugin.manager.aggregator.fetcher');
  try {
    $success = $fetcher_manager
      ->createInstance($config
      ->get('fetcher'))
      ->fetch($feed);
  } catch (PluginException $e) {
    $success = FALSE;
    watchdog_exception('aggregator', $e);
  }

  // Retrieve processor manager now.
  $processor_manager = Drupal::service('plugin.manager.aggregator.processor');

  // Store instances in an array so we dont have to instantiate new objects.
  $processor_instances = array();
  foreach ($config
    ->get('processors') as $processor) {
    try {
      $processor_instances[$processor] = $processor_manager
        ->createInstance($processor);
    } catch (PluginException $e) {
      watchdog_exception('aggregator', $e);
    }
  }

  // We store the hash of feed data in the database. When refreshing a
  // feed we compare stored hash and new hash calculated from downloaded
  // data. If both are equal we say that feed is not updated.
  $hash = hash('sha256', $feed->source_string);
  if ($success && $feed->hash->value != $hash) {

    // Parse the feed.
    $parser_manager = Drupal::service('plugin.manager.aggregator.parser');
    try {
      if ($parser_manager
        ->createInstance($config
        ->get('parser'))
        ->parse($feed)) {
        if (empty($feed->link->value)) {
          $feed->link->value = $feed->url->value;
        }
        $feed->hash->value = $hash;

        // Update feed with parsed data.
        $feed
          ->save();

        // Log if feed URL has changed.
        if ($feed->url->value != $feed_url) {
          watchdog('aggregator', 'Updated URL for feed %title to %url.', array(
            '%title' => $feed
              ->label(),
            '%url' => $feed->url->value,
          ));
        }
        watchdog('aggregator', 'There is new syndicated content from %site.', array(
          '%site' => $feed
            ->label(),
        ));
        drupal_set_message(t('There is new syndicated content from %site.', array(
          '%site' => $feed
            ->label(),
        )));

        // If there are items on the feed, let enabled processors process them.
        if (!empty($feed->items)) {
          foreach ($processor_instances as $instance) {
            $instance
              ->process($feed);
          }
        }
      }
    } catch (PluginException $e) {
      watchdog_exception('aggregator', $e);
    }
  }
  else {
    drupal_set_message(t('There is no new syndicated content from %site.', array(
      '%site' => $feed
        ->label(),
    )));
  }

  // Regardless of successful or not, indicate that this feed has been checked.
  $feed->checked->value = REQUEST_TIME;
  $feed->queued->value = 0;
  $feed
    ->save();

  // Processing is done, call postProcess on enabled processors.
  foreach ($processor_instances as $instance) {
    $instance
      ->postProcess($feed);
  }
}

/**
 * Loads an aggregator feed.
 *
 * @param int $fid
 *   The feed id.
 *
 * @return \Drupal\aggregator\Plugin\Core\Entity\Feed
 *   An object describing the feed.
 */
function aggregator_feed_load($fid) {
  return entity_load('aggregator_feed', $fid);
}

/**
 * Loads an aggregator category.
 *
 * @param $cid
 *   The category id.
 *
 * @return
 *   An associative array describing the category.
 */
function aggregator_category_load($cid) {
  $categories =& drupal_static(__FUNCTION__);
  if (!isset($categories[$cid])) {
    $categories[$cid] = db_query('SELECT * FROM {aggregator_category} WHERE cid = :cid', array(
      ':cid' => $cid,
    ))
      ->fetchObject();
  }
  return $categories[$cid];
}

/**
 * Returns HTML for an individual feed item for display in the block.
 *
 * @param $variables
 *   An associative array containing:
 *   - item: The item to be displayed.
 *   - feed: Not used.
 *
 * @ingroup themeable
 */
function theme_aggregator_block_item($variables) {

  // Display the external link to the item.
  return '<a href="' . check_url($variables['item']->link) . '">' . check_plain($variables['item']->title) . "</a>\n";
}

/**
 * Renders the HTML content safely, as allowed.
 *
 * @param $value
 *   The content to be filtered.
 *
 * @return
 *   The filtered content.
 */
function aggregator_filter_xss($value) {
  return filter_xss($value, preg_split('/\\s+|<|>/', config('aggregator.settings')
    ->get('items.allowed_html'), -1, PREG_SPLIT_NO_EMPTY));
}

/**
 * Implements hook_preprocess_HOOK() for block.html.twig.
 */
function aggregator_preprocess_block(&$variables) {
  if ($variables['configuration']['module'] == 'aggregator') {
    $variables['attributes']['role'] = 'complementary';
  }
}

Functions

Namesort ascending Description
_aggregator_has_categories Access callback: Determines whether there are any aggregator categories.
_aggregator_category_title Title callback: Returns a title for aggregator category pages.
theme_aggregator_block_item Returns HTML for an individual feed item for display in the block.
aggregator_theme Implements hook_theme().
aggregator_save_category Adds/edits/deletes aggregator categories.
aggregator_remove Removes all items from a feed.
aggregator_refresh Checks a news feed for new items.
aggregator_queue_info Implements hook_queue_info().
aggregator_preprocess_block Implements hook_preprocess_HOOK() for block.html.twig.
aggregator_permission Implements hook_permission().
aggregator_menu Implements hook_menu().
aggregator_help Implements hook_help().
aggregator_filter_xss Renders the HTML content safely, as allowed.
aggregator_feed_load Loads an aggregator feed.
aggregator_cron Implements hook_cron().
aggregator_category_load Loads an aggregator category.

Constants

Namesort ascending Description
AGGREGATOR_CLEAR_NEVER Denotes that a feed's items should never expire.