sagacity/exec/nvd_xml_parser.php
2018-05-07 10:51:08 -04:00

301 lines
9.5 KiB
PHP

<?php
/* XML parser for the NIST's National Vulnerability Database (NVD)
* Open Source license, NCSA/U. of Illinois
* Version 1.01, September 2, 2005
* Pascal Meunier
* Purdue University CERIAS
* Contact: pmeunier
* Uses the basic XML parser included with PHP
* that doesn't perform schema validation
* see http://us2.php.net/xml
*
* The NVD's XML schema doesn't have conflicting name tags
* at various nesting levels, so its parsing is relatively
* simple. Therefore, a flat namespace (independent of nesting level)
* works fine. This simple approach doesn't work, for example, with Microsoft's
* XML export of vulnerabilities.
* Note that this parser does not have cases for all tags in the NVD schema.
* Simply add "case" statements for the additional tags that matter to you.
*
* The data from parsing needs further validation or escaping if it is
* to be inserted into another database.
* -Replace the first line with the path to your installation of PHP
* for command line execution. Delete the path to PHP on the first line for
* execution with an Apache web server.
* -Choose a source for the XML file (see below)
*ChangeLog:
* 1.0 first version August 29, 2005
* 1.01 September 2, 2005
-Changed the name nvdcve-1999-2002.xml to nvdcve-2002.xml to match nvd changes
-Removed comment "from Apache" regarding https locations, as they work from the command line too, depending on how PHP was installed
-Now uses command line argument if present as XML source
-Now captures the content of <REF> tags
*/
error_reporting (E_ALL);
$debug = true; // will print messages about every field found if true
$count_entries = 0; // count number of new CVE entries found today
// Choose a source for parsing
// Check if a source was passed as argument
if (isset($argv[1])) {
parse_this($argv[1]);
} else {
parse_this("nvdcve-2004.xml");
// other possibilities:
//parse_this("http://nvd.nist.gov/download/nvdcve-2002.xml");
//parse_this("http://nvd.nist.gov/download/nvdcve-2003.xml");
//parse_this("http://nvd.nist.gov/download/nvdcve-2004.xml");
//parse_this("http://nvd.nist.gov/download/nvdcve-2005.xml");
//parse_this("http://nvd.nist.gov/download/nvdcve-2006.xml");
//
// The following do not work with vanilla PHP installs
// without the wrapper "https" (e.g., MacOS X default install)
//parse_this("https://nvd.nist.gov/download/nvdcve-2002.xml");
//parse_this("https://nvd.nist.gov/download/nvdcve-2003.xml");
//parse_this("https://nvd.nist.gov/download/nvdcve-2004.xml");
//parse_this("https://nvd.nist.gov/download/nvdcve-2005.xml");
//parse_this("https://nvd.nist.gov/download/nvdcve-2006.xml");
}
echo "Found $count_entries entries in this NVD location".PHP_EOL;
die; // not necessary, added for clarity
function startElement($parser, $name, $attrs) {
global $all_data, $CVE, $debug, $insert, $vendor, $product, $count_entries, $desc_type;
if ($debug) {
echo "processing tag named '$name'".PHP_EOL;
}
//
// the switch statement should enumerate all of the possible name tags.
// State is kept using global variables, so that nested tags can
// access their context.
//
switch ($name) {
case 'ENTRY':
$count_entries++;
// get name and publication date
$CVE = validate_CVE($attrs['NAME']);
$published = validate_date($attrs['PUBLISHED']);
// other attributes, such as severity, are available
// see http://nvd.nist.gov/download/nvdcve-xmldoc.cfm
if ($debug) {
echo "found CVE entry $CVE with publication date {$attrs['PUBLISHED']}".PHP_EOL;
}
break;
case 'DESCRIPT':
// if there ever was more than one type of description
// remember which source with a global
// NVD or CVE
$desc_type = $attrs['SOURCE'];
// reset data accumulator
$all_data = '';
// need to wait for end tag to get contents
break;
case 'PROD':
$vendor = $attrs['VENDOR'];
$product = $attrs['NAME'];
if ($CVE == '') {
echo "error, no CVE number";
die;
}
if ($debug) {
echo "found vendor $vendor";
echo " found product $product".PHP_EOL;
}
if ($vendor == "") {
// this happens
echo "NVD integrity alert: no vendor for product $product, CVE is $CVE".PHP_EOL;
}
if ($product == "") {
echo "NVD integrity alert: no product, CVE is $CVE".PHP_EOL;
}
break;
case 'LOSS_TYPES':
break;
case 'VULNS_TYPES':
break;
case 'REF':
// source, url, etc...
if ($debug) {
echo " reference from {$attrs['SOURCE']}";
echo " is available at {$attrs['URL']}";
echo PHP_EOL;
}
$all_data = '';
// need to wait for end tag to get contents
break;
case 'VERS':
if ($debug) {
echo " version {$attrs['NUM']}";
echo " of product $product is vulnerable".PHP_EOL;
}
break;
case '':
break;
}
}
function endElement($parser, $name) {
global $all_data, $CVE, $debug;
switch ($name) {
case 'ENTRY':
$CVE = '';
break;
case 'DESCRIPT':
if ($CVE == '') {
echo "error, no CVE number";
die;
}
if ($debug) {
echo "found description $all_data".PHP_EOL." for CVE entry $CVE".PHP_EOL;
}
// reset data accumulator
$all_data = '';
break;
case 'REF':
if ($debug) {
echo "found reference content '$all_data'".PHP_EOL." for CVE entry $CVE".PHP_EOL;
}
// reset data accumulator
$all_data = '';
break;
case 'AVAIL':
if ($debug) {
echo "loss type is availability".PHP_EOL;
}
break;
case 'CONF':
if ($debug) {
echo "loss type is confidentiality".PHP_EOL;
}
break;
case 'INT':
if ($debug) {
echo "loss type is integrity".PHP_EOL;
}
break;
case 'SEC_PROT':
if ($debug) {
echo "loss type is security protection".PHP_EOL;
}
break;
case 'ACCESS':
if ($debug) {
echo "vulnerability type is access".PHP_EOL;
}
break;
case 'INPUT':
if ($debug) {
echo "vulnerability type is input".PHP_EOL;
}
break;
case 'DESIGN':
if ($debug) {
echo "vulnerability type is design".PHP_EOL;
}
break;
case 'EXCEPTION':
if ($debug) {
echo "vulnerability type is exception".PHP_EOL;
}
break;
case 'ENV':
if ($debug) {
echo "vulnerability type is environment".PHP_EOL;
}
break;
case 'CONFIG':
if ($debug) {
echo "vulnerability type is configuration".PHP_EOL;
}
break;
case 'RACE':
if ($debug) {
echo "vulnerability type is race".PHP_EOL;
}
break;
case 'OTHER':
if ($debug) {
echo "vulnerability type is other".PHP_EOL;
}
break;
case 'REMOTE':
if ($debug) {
echo "vulnerability range is remote".PHP_EOL;
}
break;
case 'LOCAL':
if ($debug) {
echo "vulnerability range is local".PHP_EOL;
}
break;
case 'USER_INIT':
if ($debug) {
echo "vulnerability range is through a user".PHP_EOL;
}
break;
case '':
break;
}
}
function characterData($parser, $data) {
global $all_data;
// concatenate due to bug with & according to a user entry in PHP docs
// entry by sam at cwa dot co dot nz, Sept 2000
// This issue might have been fixed later. The fix can't hurt though.
$all_data .= $data;
}
function parse_this($url) {
// code for this function is from http://us2.php.net/xml
$xml_parser = xml_parser_create();
// use case-folding so we are sure to find the tag in $map_array
xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, true);
xml_set_element_handler($xml_parser, "startElement", "endElement");
xml_set_character_data_handler($xml_parser, "characterData");
if (!($fp = fopen($url, "r"))) {
die("could not open XML input");
}
while ($data = fread($fp, 4096)) {
if (!xml_parse($xml_parser, $data, feof($fp))) {
die(sprintf("XML error: %s at line %d",
xml_error_string(xml_get_error_code($xml_parser)),
xml_get_current_line_number($xml_parser)));
}
}
xml_parser_free($xml_parser);
}
function validate_CVE($input) {
// expecting something like CAN-2004-0002
// NVD documentation suggests this reg exp,
// (http://nvd.nist.gov/download/nvdcve-xmldoc.cfm)
// (CAN|CVE)-/d/d/d/d-/d/d/d/d
// which doesn't work. See below
if (preg_match('/(CAN|CVE)\-\d\d\d\d\-\d+/', $input, $matches)) {
return $matches[0];
} else {
// if there was an error (false) or if 0 matches
die ("Invalid CVE number encountered: $input");
}
}
function validate_Date($input) {
// expecting something like 2004-03-03
if (preg_match('/\d{4}\-\d{2}\-\d{2}/', $input, $matches)) {
return $matches[0];
} else {
// if there was an error (false) or if 0 matches
die ("Invalid date encountered: $input");
}
}
?>