<?php
  // tags_within.php
  // RJM Programming
  // November, 2014
  //
  // Report on tags within tag

  $url = "http://" . $_SERVER['SERVER_NAME'] . ":" . $_SERVER['SERVER_PORT'] . str_replace("?" . $_SERVER['QUERY_STRING'], "", $_SERVER['REQUEST_URI']);
  $ptag = ""; //"body";
  $ctag = ""; //"form";
  $attrib = "";
  $start = "<";
  $end = "</";
  if (isset($_GET['url'])) {
    if (strpos(urldecode($_GET['url']), "http:") !== false || strpos(urldecode($_GET['url']), "https:") !== false || strpos(urldecode($_GET['url']), "HTTP:") !== false || strpos(urldecode($_GET['url']), "HTTPS:") !== false) {
      $url = urldecode($_GET['url']);
    }
  } else if (isset($_POST['url'])) {
    $url = urldecode($_POST['url']);
  }
  if (isset($_GET['ptag'])) {
    $ptag = urldecode($_GET['ptag']);
  } else if (isset($_POST['ptag'])) {
    $ptag = urldecode($_POST['ptag']);
  }
  if (isset($_GET['ctag'])) {
    $ctag = urldecode($_GET['ctag']);
  } else if (isset($_POST['ctag'])) {
    $ctag = urldecode($_POST['ctag']);
  }
  if (isset($_GET['attrib'])) {
    $attrib = urldecode($_GET['attrib']);
  } else if (isset($_POST['attrib'])) {
    $attrib = urldecode($_POST['attrib']);
  }
  if (isset($_GET['start'])) {
    $start = urldecode($_GET['start']);
  } else if (isset($_POST['start'])) {
    $start = urldecode($_POST['start']);
  }
  if (isset($_GET['end'])) {
    $end = urldecode($_GET['end']);
  } else if (isset($_POST['end'])) {
    $end = urldecode($_POST['end']);
  }
  
  $makeuplist = ";";
  $suffix = " ... made up of <br>";
  $toosimple = " ... made up of <br>";
  
  if ($url != "" && $ptag != "" && $ctag != "") {
    echo '<!doctype HTML>
<html>
<head>
<script type="text/javascript">

  var which="url";
  var what="/youllneverfindthis/";

  
  function check(thisfordelim) {
    var outval = thisfordelim.value;
    if (outval.indexOf("http") != 0 && outval.indexOf("HTTP") != 0) outval = "http://";
    return outval;
  }


</script>
</head>
<body style=background-color:lightgray;>
';
    echo '<h1 align="center">Find Tags Within Tags With or Without Attributes</h1><br><br>' . "\n";

    echo '<div align="center" style="background-color:yellow;border:15px green solid;"><br><h2>Report regarding ' . $url . ' ...</h2><br><p>' . "\n";
    $cont = file_get_contents($url);
    
    if (strlen($cont) > 0) {
      $cnt = 0;
      $report = "";
      $startchar = substr($start, 0, 1);
      $endchar = str_replace("<", ">", str_replace("[", "]", str_replace("{", "}", str_replace("(", ")", $startchar))));
      if (substr($end, 0, 1) == "/") {
        $pseudo_end = str_replace("/", "|", str_replace($endchar, "@", $end)); 
        $sparecont = str_replace($pseudo_end, $end, str_replace($endchar, " ", str_replace($end, $pseudo_end, $cont)));
        $epll = $end;   // likely is "/>"
        $epul = $end;
        $ecll = $end;
        $ecul = $end;
      } else {
        $sparecont = str_replace($endchar, " ", $cont);
        $epll = $end . strtolower($ptag) . $endchar;  // likely is something like "</body>"
        $epul = $end . strtoupper($ptag) . $endchar;
        $ecll = $end . strtolower($ctag) . $endchar;
        $ecul = $end . strtoupper($ctag) . $endchar;
      }
      $spll = $start . strtolower($ptag) . " "; // likely is something like "<body "
      $spul = $start . strtoupper($ptag) . " ";
      $scll = $start . strtolower($ctag) . " ";
      $scul = $start . strtoupper($ctag) . " ";
      $huhl = strpos($sparecont, $spll);
      $huhu = strpos($sparecont, $spul);
      
      if ($huhl !== false) {
        $huhls = explode($spll, strtolower($sparecont));
        echo "<br>Total parent tag <b><i>" . str_replace("<", "&lt;", str_replace(">", "&gt;", $spll)) . "</b></i> found is <b><i>" . (sizeof($huhls) - 1) . "</b></i><br>";
        if ($attrib != "" && (sizeof($huhls) - 1) >= 1) {
          $rcnt = 0;
          $lcnt = strlen($huhls[0]);
          $newspare = $huhls[0];
          for ($ij=1; $ij<sizeof($huhls); $ij++) {
            $lcnt += strlen($spll);
            if (substr($cont, $lcnt, 1) != $endchar) {
              $wii = explode($endchar, $huhls[$ij]);
              if (strpos(strtolower($attrib), strtolower($wii[0])) !== false) {
                $rcnt = $rcnt + 1;
                $newspare .= $spll;
              } else {
                $newspare .= substr("@!#$%@#$!@#$%%%%%%;", 0, strlen($spll));
                $nextis = strpos($huhls[$ij], $epll);
                if (strlen($epll) > strlen($spll) && $nextis !== false) {
                  $snextis = explode($epll, $huhls[$ij]);
                  $huhls[$ij] = $snextis[0] . substr("@!#$%@#$!@#$%%%%%%;", 0, strlen($epll)) . substr($huhls[$ij], strlen($snextis[0]) + strlen($epll));
                }
              }
            } else {
              $newspare .= substr("@!#$%@#$!@#$%%%%%%;", 0, strlen($spll));
              $nextis = strpos($huhls[$ij], $epll);
              if (strlen($epll) > strlen($spll) && $nextis !== false) {
                  $snextis = explode($epll, $huhls[$ij]);
                  $huhls[$ij] = $snextis[0] . substr("@!#$%@#$!@#$%%%%%%;", 0, strlen($epll)) . substr($huhls[$ij], strlen($snextis[0]) + strlen($epll));
              }
            }
            $lcnt += strlen($huhls[$ij]);
            $newspare .= $huhls[$ij];
          }
          $sparecont = $newspare;
          echo "<br>Total relevant (ie. contains " . $attrib . ") parent tag <b><i>" . str_replace("<", "&lt;", str_replace(">", "&gt;", $spll)) . "</b></i> found is <b><i>" . $rcnt . "</b></i><br>";
          $huhl = strpos(strtolower($sparecont), $spll);
        }
        $huhls = explode($scll, strtolower($sparecont));
        echo "<br>Total child tag <b><i>" . str_replace("<", "&lt;", str_replace(">", "&gt;", $scll)) . "</b></i> found is <b><i>" . (sizeof($huhls) - 1) . "</b></i><br>";
        $found = 0;
        if ($huhl !== false) {
          $huhls = explode($spll, strtolower($sparecont));
          $newspare = "";

          $newspare = $huhls[0];
          for ($ij=1; $ij<sizeof($huhls); $ij++) {
              $newspare .= $spll;
              $nextis = strpos($huhls[$ij], $epll);
              $snextis = explode($epll, $huhls[$ij]);
              $huhls[$ij] = $snextis[0] . substr("@!#$%@#$!@#$%%%%%%;", 0, strlen($epll)) . substr($huhls[$ij], strlen($snextis[0]) + strlen($epll));
              $newspare .= $huhls[$ij];
          }
          $sparecont = $newspare;
          $huhls = explode($spll, strtolower($sparecont));
          if ($huhl !== false) {
             $huhls = explode($spll, strtolower($sparecont));
             for ($ij=1; $ij<sizeof($huhls); $ij++) {
              $nexta = explode($epll, $huhls[$ij]);
              //echo "<br>" . str_replace("<", "&lt;", str_replace(">", "&gt;", $scll)) . " ... " . str_replace("<", "&lt;", str_replace(">", "&gt;", $nexta[0])) . "<br>";
              $stuff = explode($scll, $nexta[0]);
              if (strpos($makeuplist, ";" . (sizeof($stuff) - 1) . ":") !== false) {
                $fi = explode(";" . (sizeof($stuff) - 1) . ":", $makeuplist);
                $fin = explode(";", $fi[1]);
                $makeuplist = str_replace(";" . (sizeof($stuff) - 1) . ":" . $fin[0] . ";", ";" . (sizeof($stuff) - 1) . ":" . ($fin[0] + 1) . ";", $makeuplist);
              } else if (strpos($makeuplist, ":") !== false) {
                $toosimple = "youll_neverfindthis";
                $makeuplist .= (sizeof($stuff) - 1) . ":1;";
              } else { 
                $makeuplist .= (sizeof($stuff) - 1) . ":1;";
              }
              if ($toosimple != $suffix) {
                  $suffix = " ... made up of <br>" . str_replace(":", " count is ", str_replace(";", "<br> ... Groups of ", substr($makeuplist,0,(strlen($makeuplist) - 1))));
              }
              $found += (sizeof($stuff) - 1);
             }       
          }


        }
        echo "<br>Total child tag <b><i>" . str_replace("<", "&lt;", str_replace(">", "&gt;", $scll)) . "</b></i> found within parent tag <b><i>" . str_replace("<", "&lt;", str_replace(">", "&gt;", $spll)) . "</b></i> is <b><i>" . $found . "</b></i>" . str_replace($toosimple, "", $suffix) . "<br>";
      } else if ($huhu !== false) {
        $huhls = explode($spul, strtoupper($sparecont));
        echo "<br>Total parent tag <b><i>" . str_replace("<", "&lt;", str_replace(">", "&gt;", $spll)) . "</b></i> found is <b><i>" . (sizeof($huhls) - 1) . "</b></i><br>";
        if ($attrib != "" && (sizeof($huhls) - 1) >= 1) {
          $rcnt = 0;
          $lcnt = strlen($huhls[0]);
          $newspare = $huhls[0];
          for ($ij=1; $ij<sizeof($huhls); $ij++) {
            $lcnt += strlen($spll);
            if (substr($cont, $lcnt, 1) != $endchar) {
              $wii = explode($endchar, $huhls[$ij]);
              if (strpos(strtolower($attrib), strtolower($wii[0])) !== false) {
                $rcnt = $rcnt + 1;
                $newspare .= $spul;
              } else {
                $newspare .= substr("@!#$%@#$!@#$%%%%%%;", 0, strlen($spul));
                $nextis = strpos($huhls[$ij], $epul);
                if (strlen($epul) > strlen($spul) && $nextis !== false) {
                  $snextis = explode($epul, $huhls[$ij]);
                  $huhls[$ij] = $snextis[0] . substr("@!#$%@#$!@#$%%%%%%;", 0, strlen($epul)) . substr($huhls[$ij], strlen($snextis[0]) + strlen($epul));
                }
              }
            } else {
              $newspare .= substr("@!#$%@#$!@#$%%%%%%;", 0, strlen($spul));
              $nextis = strpos($huhls[$ij], $epul);
              if (strlen($epul) > strlen($spul) && $nextis !== false) {
                  $snextis = explode($epul, $huhls[$ij]);
                  $huhls[$ij] = $snextis[0] . substr("@!#$%@#$!@#$%%%%%%;", 0, strlen($epul)) . substr($huhls[$ij], strlen($snextis[0]) + strlen($epul));
              }
            }
            $lcnt += strlen($huhls[$ij]);
            $newspare .= $huhls[$ij];
          }
          $sparecont = $newspare;
          echo "<br>Total relevant (ie. contains " . $attrib . ") parent tag <b><i>" . str_replace("<", "&lt;", str_replace(">", "&gt;", $spll)) . "</b></i> found is <b><i>" . $rcnt . "</b></i><br>";
          $huhu = strpos(strtoupper($sparecont), $spul);
        }
        $huhls = explode($scul, strtoupper($sparecont));
        echo "<br>Total child tag <b><i>" . str_replace("<", "&lt;", str_replace(">", "&gt;", $scll)) . "</b></i> found is <b><i>" . (sizeof($huhls) - 1) . "</b></i><br>";
        $found = 0;
        if ($huhu !== false) {
          $huhls = explode($spul, strtoupper($sparecont));
          $newspare = "";

          $newspare = $huhls[0];
          for ($ij=1; $ij<sizeof($huhls); $ij++) {
              $newspare .= $spul;
              $nextis = strpos($huhls[$ij], $epul);
              $snextis = explode($epul, $huhls[$ij]);
              $huhls[$ij] = $snextis[0] . substr("@!#$%@#$!@#$%%%%%%;", 0, strlen($epul)) . substr($huhls[$ij], strlen($snextis[0]) + strlen($epul));
              $newspare .= $huhls[$ij];
          }
          $sparecont = $newspare;
          $huhls = explode($spul, strtolower($sparecont));
          if ($huhl !== false) {
             $huhls = explode($spul, strtolower($sparecont));
             for ($ij=1; $ij<sizeof($huhls); $ij++) {
              $nexta = explode($epul, $huhls[$ij]);
              //echo "<br>" . str_replace("<", "&lt;", str_replace(">", "&gt;", $scul)) . " ... " . str_replace("<", "&lt;", str_replace(">", "&gt;", $nexta[0])) . "<br>";
              $stuff = explode($scul, $nexta[0]);
              if (strpos($makeuplist, ";" . (sizeof($stuff) - 1) . ":") !== false) {
                $fi = explode(";" . (sizeof($stuff) - 1) . ":", $makeuplist);
                $fin = explode(";", $fi[1]);
                $makeuplist = str_replace(";" . (sizeof($stuff) - 1) . ":" . $fin[0] . ";", ";" . (sizeof($stuff) - 1) . ":" . ($fin[0] + 1) . ";", $makeuplist);
              } else if (strpos($makeuplist, ":") !== false) {
                $toosimple = "youll_neverfindthis";
                $makeuplist .= (sizeof($stuff) - 1) . ":1;";
              } else { 
                $makeuplist .= (sizeof($stuff) - 1) . ":1;";
              }
              if ($toosimple != $suffix) {
                  $suffix = " ... made up of <br>" . str_replace(":", " count is ", str_replace(";", "<br> ... Groups of ", substr($makeuplist,0,(strlen($makeuplist) - 1))));
              }
              $found += (sizeof($stuff) - 1);
             }       
          }

        }
        echo "<br>Total child tag <b><i>" . str_replace("<", "&lt;", str_replace(">", "&gt;", $scll)) . "</b></i> found within parent tag <b><i>" . str_replace("<", "&lt;", str_replace(">", "&gt;", $spll)) . "</b></i> is <b><i>" . $found . "</b></i>" . str_replace($toosimple, "", $suffix) . "<br>";
      }
    
    }

	echo '</p><hr></div>' . "\n";


    echo '<div align="center" style="background-color:pink;border:5px green solid;"><br><form action="' . "http://" . $_SERVER['SERVER_NAME'] . ":" . $_SERVER['SERVER_PORT'] . str_replace("?" . $_SERVER['QUERY_STRING'], "", $_SERVER['REQUEST_URI']) . '" method="POST">' . "\n";
	echo 'Url: <input onblur=" this.value = check(this); " type="text" name="url" id="url" size=80 value="' . "http://" . $_SERVER['SERVER_NAME'] . ":" . $_SERVER['SERVER_PORT'] . str_replace("?" . $_SERVER['QUERY_STRING'], "", $_SERVER['REQUEST_URI']) . '"><br><br>' . "\n";
	echo 'Parent (in) tag: <input type="text" name="ptag" id="ptag" value="body"><br><br>' . "\n";
	echo 'Child (within) tag: <input type="text" name="ctag" id="ctag" value="form"><br><br>' . "\n";
	echo 'Parent Attribute search (optional): <input type="text" name="attrib" id="attrib" value=""><br><br>' . "\n";
	echo 'Start delimiter: <input type="text" name="start" id="start" value="&lt;"><br><br>' . "\n";
	echo 'End delimiter: <input type="text" name="end" id="end" value="&lt;/"><br><br><br>' . "\n";
	echo '<input type="submit" name="submit" id="submit" value="Report"><br><br>' . "\n";
	echo '</form></div>' . "\n";


  } else {
    echo '<!doctype HTML>
<html>
<head>
<script type="text/javascript">


  var which="url";
  var what="/youllneverfindthis/";

  
  function check(thisfordelim) {
    var outval = thisfordelim.value;
    if (outval.indexOf("http") != 0 && outval.indexOf("HTTP") != 0) outval = "http://";
    return outval;
  }


</script>
</head>
<body style=background-color:lightgray;>
';
    echo '<h1 align="center">Find Tags Within Tags With or Without Attributes</h1><br><br>' . "\n";
    echo '<div align="center" style="background-color:pink;border:9px yellow solid;"><br><form action="' . "http://" . $_SERVER['SERVER_NAME'] . ":" . $_SERVER['SERVER_PORT'] . str_replace("?" . $_SERVER['QUERY_STRING'], "", $_SERVER['REQUEST_URI']) . '" method="POST">' . "\n";
	echo 'Url: <input onblur=" this.value = check(this); " type="text" name="url" id="url" size=80 value="' . "http://" . $_SERVER['SERVER_NAME'] . ":" . $_SERVER['SERVER_PORT'] . str_replace("?" . $_SERVER['QUERY_STRING'], "", $_SERVER['REQUEST_URI']) . '"><br><br>' . "\n";
	echo 'Parent (in) tag: <input type="text" name="ptag" id="ptag" value="body"><br><br>' . "\n";
	echo 'Child (within) tag: <input type="text" name="ctag" id="ctag" value="form"><br><br>' . "\n";
	echo 'Parent Attribute search (optional): <input type="text" name="attrib" id="attrib" value=""><br><br>' . "\n";
	echo 'Start delimiter: <input type="text" name="start" id="start" value="&lt;"><br><br>' . "\n";
	echo 'End delimiter: <input type="text" name="end" id="end" value="&lt;/"><br><br><br>' . "\n";
	echo '<input type="submit" name="submit" id="submit" value="Report"><br><br>' . "\n";
	echo '</form></div>' . "\n";
  }

?>
</body>
</html>

