Attempt to guess the document format from some content.
If $filename is given, then the suffix is first used to guess the format.
If the document format is not recognised, null is returned.
string $data The document data:
string $filename Optional filename:
object EasyRdf_Format The format object
public static function guessFormat($data, $filename = null) {
if (is_array($data)) {
# Data has already been parsed into RDF/PHP
return self::getFormat('php');
}
// First try and identify by the filename
if ($filename and preg_match("/\\.(\\w+)\$/", $filename, $matches)) {
foreach (self::$formats as $format) {
if (in_array($matches[1], $format->extensions)) {
return $format;
}
}
}
// Then try and guess by the first 255 bytes of content
$short = substr($data, 0, 255);
if (preg_match("/^\\s*\\{/", $short)) {
return self::getFormat('json');
}
elseif (preg_match("/<rdf:/i", $short)) {
return self::getFormat('rdfxml');
}
elseif (preg_match("/@prefix\\s|@base\\s/", $short)) {
return self::getFormat('turtle');
}
elseif (preg_match("/^\\s*<.+> <.+>/m", $short)) {
return self::getFormat('ntriples');
}
elseif (preg_match("|http://www.w3.org/2005/sparql-results|", $short)) {
return self::getFormat('sparql-xml');
}
elseif (preg_match("/\\WRDFa\\W/i", $short)) {
return self::getFormat('rdfa');
}
elseif (preg_match("/<!DOCTYPE html|<html/i", $short)) {
# We don't support any other microformats embedded in HTML
return self::getFormat('rdfa');
}
else {
return null;
}
}