<?php
// Validate xml via dtd ... RJM Programming September, 2013 eg. http://localhost:8888/xmldtdvalidate.php?inxml=http://www.rjmprogramming.com.au/Java/Eclipse/XMLValidator/Invoice.xml_GETME
// Big thanks to http://stackoverflow.com/questions/101935/validate-xml-using-a-custom-dtd-in-php
// Big thanks to http://php.net/manual/en/function.libxml-get-errors.php
// Am not sure why the error line numbers needed to be halved, in order to work, but have a feeling it is to do with end of line (CR/LF) delimitation.
function xmldtdvalidate($forcxml_realpath, $forcdtd_realpath=null) {
    $where = "0";
    $thingsfound = "";
    $suffix = "";
    $altsparename = "youllneverfindthis.x";
    $altdtdname = "youllneverfindthis.dtd";
    $forcxmlrealpath = "*huh*";
    $validdata = "";
    if (!file_exists($forcxml_realpath)) {
     $httppos = strpos(str_replace("https", "http", $forcxml_realpath), "http:");
     if ($httppos !== false && $httppos == 0) {
      $altdtdname = str_replace(".xml", ".dtd", $forcxml_realpath);
      $xxx = "";
      try {
         $xxx = file_get_contents($forcxml_realpath);
         $forcxmlrealpath = $forcxml_realpath;
      } catch (Exception $ex) {
         $xxx = "";
         $thingsfound .= (" " . $ex->getMessage());
      }
      $fhuh = fopen($altsparename, "w");
      fwrite($fhuh, $xxx);
      fclose($fhuh);
      $forcxml_lines = file($altsparename);
      $suffix .= " for xml url " . $forcxml_realpath;
      if ($forcdtd_realpath == null) $forcdtd_realpath = $altdtdname;
      $where = "1";
     } else {
      $httppos = strpos($forcxml_realpath, "file:");
      if ($httppos !== false && $httppos == 0) {
       $altdtdname = str_replace(".xml", ".dtd", $forcxml_realpath);
       $xxx = "";
       try {
         $xxx = file_get_contents($forcxml_realpath);
         $forcxmlrealpath = $forcxml_realpath;
       } catch (Exception $ex) {
         $xxx = "";
         $thingsfound .= (" " . $ex->getMessage());
       }
       $fhuh = fopen($altsparename, "w");
       fwrite($fhuh, $xxx);
       fclose($fhuh);
       $suffix .= " for xml url " . $forcxml_realpath;
       $forcxml_lines = file($altsparename);
       if ($forcdtd_realpath == null) $forcdtd_realpath = $altdtdname;
       $where = "2";
      } else {
       $fhuh = fopen($altsparename, "w");
       fwrite($fhuh, $forcxml_realpath);
       fclose($fhuh);
       $forcxml_lines = file($altsparename);
       $where = "3";
      }
     }
    } else {
     $suffix .= " for xml file " . $forcxml_realpath;
     $altdtdname = str_replace(".xml", ".dtd", $forcxml_realpath);
     try {
       $forcxml_lines = file($forcxml_realpath);
       if (count($forcxml_lines) == 0) {
          $thingsfound .= " access to " . $forcxml_realpath . " denied";
       } else {
         $forcxmlrealpath = $forcxml_realpath;
       }
     } catch (Exception $ex) {
       $thingsfound .= (" " . $ex->getMessage());
     }
     $where = "4";
    }
    if ($forcdtd_realpath != null) {
     if (!file_exists($forcdtd_realpath)) {
      $httppos = strpos(str_replace("https", "http", $forcdtd_realpath), "http:");
      if ($httppos !== false && $httppos == 0) {
       $xxx = "";
       try {
         $xxx = file_get_contents($forcdtd_realpath);
       } catch (Exception $ex) {
         $xxx = "";
         $thingsfound .= (" " . $ex->getMessage());
       }
       $fhuh = fopen($altsparename, "w");
       fwrite($fhuh, $xxx);
       fclose($fhuh);
       $forcdtd_lines = file($altsparename);
       $where .= "1";
       $suffix .= " and dtd url " . $forcdtd_realpath;
      } else {
       $httppos = strpos($forcdtd_realpath, "file:");
       if ($httppos !== false && $httppos == 0) {
        $xxx = "";
        try {
         $xxx = file_get_contents($forcdtd_realpath);
        } catch (Exception $ex) {
         $xxx = "";
         $thingsfound .= (" " . $ex->getMessage());
        }
        $fhuh = fopen($altsparename, "w");
        fwrite($fhuh, $xxx);
        fclose($fhuh);
        $forcdtd_lines = file($altsparename);
        $suffix .= " and dtd url " . $forcdtd_realpath;
        $where .= "2";
       } else {
        $fhuh = fopen($altdtdname, "w");
        fwrite($fhuh, $forcdtd_realpath);
        fclose($fhuh);
        $forcdtd_realpath = $altdtdname;
        $forcdtd_lines = file($forcdtd_realpath);
        $where .= "3";
       }
      }
     } else {
      $suffix .= " and dtd file " . $forcdtd_realpath;
      try {
       $forcdtd_lines = file($forcdtd_realpath);
       if (count($forcdtd_lines) == 0) $thingsfound .= " access to " . $forcdtd_realpath . " denied";
      } catch (Exception $ex) {
       $thingsfound .= (" " . $ex->getMessage());
      }
      $where .= "4";
     }
    }
    $xmldoc = new DOMDocument;
    $forcx = "huh";
    libxml_use_internal_errors(true);
    if ($forcdtd_realpath) {
        // Inject DTD inside DOCTYPE line:
        //$forcdtd_lines = file($forcdtd_realpath);
        $forcnew_lines = array();
        foreach ($forcxml_lines as $forcx) {
            if ($validdata == "") $validdata = "<a href='#' onmouseover=' document.getElementById(" . '"' . "revealp" . '"' . ").style.display = " . '"' . "block" . '"' . "; ' onclick=' document.getElementById(" . '"' . "revealp" . '"' . ").style.display = " . '"' . "block" . '"' . "; '>Show xml</a><p id='revealp' style='background-color:yellow; font-family: Courier, " . '"' . "Courier New" . '"' . ", monospace; display:none;'>";
            // Assume DOCTYPE format used:
            if (preg_match('/DOCTYPE/', $forcx)) {
                $forcy = preg_replace('/SYSTEM "(.*)"/', " [\n" . implode("\n", $forcdtd_lines) . "\n]", $forcx);
                $forcnew_lines[] = $forcy;
                $validdata .= str_replace(">", "&gt;", str_replace("<", "&lt;", $forcy)) . "<br>";
            } else {
                $forcnew_lines[] = $forcx;
                $validdata .= str_replace(">", "&gt;", str_replace("<", "&lt;", $forcx)) . "<br>";
            }
        }
        $xmldoc->loadXML(implode("\n", $forcnew_lines));
    } else {
        $xmldoc->loadXML(implode("\n", $forcxml_lines));
    }
    // Enable user error handling
    libxml_use_internal_errors(true);
    if (@$xmldoc->validate() && $forcx != "huh") {
        if ($validdata != "") $validdata .= "</p>";
        echo "Valid" . $suffix . "!\n" . $validdata;
    } else {
        echo "Not valid" . $suffix . $thingsfound . ".\n<p style='background-color:pink; font-family: Courier, " . '"' . "Courier New" . '"' . ", monospace'>";
        
        $forcerrors = libxml_get_errors();
        //foreach ($forcerrors as $forcerror) {
        //    print_r($forcerror, true);
        //}
        $fhuh = fopen($altsparename, "w");
        fwrite($fhuh, $validdata);
        fclose($fhuh);
        $forcxmllines = file($altsparename);
        foreach ($forcerrors as $forcerror) {
            echo str_replace("\n", "<br>", display_xml_error($forcerror, $forcxmllines, $forcxmlrealpath));
        }
        echo "</p>" . $validdata;
    }
    libxml_clear_errors();
}

function display_xml_error($error, $xml, $infilepath)
{
    $xe = ($error->line % 2);
    $errorline = ($error->line + $xe) / 2;
    //$error->code = str_replace($error->line, $errorline, $error->code);
    $return  = $xml[$errorline - 1] . "\n";
    $return .= str_repeat('-', ($error->column / 2)) . "^\n";
    $ret = "";

    switch ($error->level) {
        case LIBXML_ERR_WARNING:
            $ret .= str_replace(" " . $error->line . " ", " " . $errorline . " ", "Warning " . $error->code) . ": ";
            break;
        case LIBXML_ERR_ERROR:
            $ret .= str_replace(" " . $error->line . " ", " " . $errorline . " ", "Error " . $error->code) . ": ";
            break;
        case LIBXML_ERR_FATAL:
            $ret .= str_replace(" " . $error->line . " ", " " . $errorline . " ", "Fatal Error " . $error->code) . ": ";
            break;
    }
    $ret = str_replace(" " . $error->line . " ", " " . $errorline . " ", $ret);
    $return .= $ret;
    $return = str_replace(" " . $error->line . " ", " " . $errorline . " ", $return);
    $return .= trim($error->message) .
               "\n  Line: " . $errorline . 
               "\n  Column: " . ($error->column / 2);

    if ($error->file) {
        $return .= ("\n  File: " . str_replace("*huh*", $error->file, $infilepath));
    } else if ($infilepath != "") {
        $return .= ("\n  File: " . $infilepath);
    }

    return str_replace(" " . $error->line . " ", " " . $errorline . " ", $return) . "\n--------------------------------------------\n";
}

if (isset($_GET['inxml'])) {
    if (isset($_GET['indtd'])) {
      xmldtdvalidate($_GET['inxml'], $_GET['indtd']);
    } else {
      xmldtdvalidate($_GET['inxml']);
    }
} else {
	echo "<html><body><p>Try call like ./xmldtdvalidate.php?inxml=test.xml&indtd=test.dtd</p>";
	echo "<s" . "cript> var xml=prompt('Enter xml[,dtd] files or urls to validate eg. test.xml,test.dtd', ''); if (xml != null) { if (xml != '') window.location = './xmldtdvalidate.php?inxml=' + xml.replace(',','&indtd='); }</s" . "cript></body></html>";
}

?>
