<?php
// textofpdf.php
// RJM Programming
// July, 2018
// Thanks to https://stackoverflow.com/questions/3650957/how-to-extract-text-from-a-pdf#6
$inpdf="";
$viafile="";
if (isset($_GET['viafile'])) {
  $viafile=urldecode($_GET['viafile']);
} else if (isset($_POST['viafile'])) {
  $viafile=urldecode($_POST['viafile']);
}
if ($viafile == "") {
if (isset($_GET['pdf'])) {
  $inpdf=urldecode($_GET['pdf']);
} else if (isset($_POST['pdf'])) {
  $inpdf=urldecode($_POST['pdf']);
}
} else {
if (isset($_GET['pdf'])) {
  $inpdf=base64_decode(substr($_GET['pdf'],strpos($_GET['pdf'], ",") + 1));
} else if (isset($_POST['pdf'])) {
  $inpdf=base64_decode(substr($_POST['pdf'],strpos($_POST['pdf'], ",") + 1));
}
}
if ($inpdf == "") {
  echo "<!doctype html><html><head><title>Text of PDF</title><meta http-equiv='Content-Type' content='text/html; charset=UTF-8' /><script type='text/javascript'>

var w = null;
var done=false;
var okay=true;
  
function process(infilecontents) {
    if (infilecontents != null) {
        document.getElementById('viafile').value='y';
        document.getElementById('myform').enctype='multipart/form-data';
        if (1 == 2) document.getElementById('fsubmit').click();
        document.getElementById('fil').value='';
        done=false;
    }
}

function aalater() {
    readBlob(0,0); //document.getElementById('pbutton').click();
}

function alater() {
  if (!done) {
    var files = document.getElementById('file').files;
    if (files.length) {
      w = files[0].name;
      //alert(w);
      document.getElementById('fil').value = w;
    }
    if (document.getElementById('fil').value != '') {
      done=true;
      setTimeout(aalater, 5000);
    } else {
      setTimeout(alater, 1000);
    }
  }
}

function readBlob(opt_startByte, opt_stopByte) {

    var files = document.getElementById('file').files;
    if (!files.length) {
      alert('Please select a PDF file!');
      return;
    } else {
      w = files[0].name;
      //alert(w);
      document.getElementById('fil').value = w;
    }

    var file = files[0];
    var start = parseInt(opt_startByte) || 0;
    var stop = parseInt(opt_stopByte) || file.size - 1;

    var readertwo = new FileReader();
    var reader = new FileReader();

    // If we use onloadend, we need to check the readyState.
    reader.onloadend = function(evt) {
      if (evt.target.readyState == FileReader.DONE) { // DONE == 2
        if (okay) {
        document.getElementById('pdf').style.display='none';
        document.getElementById('pdf').innerHTML = evt.target.result;
        //alert('preclick ... ' + evt.target.result);
        //document.getElementById('submit').click();
        //cis.document.write(evt.target.result.replace(/</g, '&lt;').replace(/>/g, '&gt;'));
        //document.body.innerHTML = document.body.innerHTML + '<div id=be style=display:block;>' + evt.target.result.replace(/</g, '&lt;').replace(/>/g, '&gt;') + '</div>';
        //document.getElementById('byte_content').textContent = evt.target.result;
        //document.getElementById('byte_range').textContent = 
        //    ['Read bytes: ', start + 1, ' - ', stop + 1,
        //     ' of ', file.size, ' byte file'].join('');
        process(evt.target.result);
        } else {
        document.getElementById('fil').value='';
        done=false;
        alert('Not a PDF file!');
        okay=true;
        document.getElementById('pdf').style.display='block';
        location.href=document.URL;
        }
      }
    };

    readertwo.onloadend = function(evt) {
      if (evt.target.readyState == FileReader.DONE) { // DONE == 2
       if (('' + evt.target.result + '    ').substring(0,4) == '%PDF') {
        document.getElementById('pdf').style.display='none';
        document.getElementById('showpdf').style.display='block';
        document.getElementById('showpdf').innerHTML = evt.target.result;
       } else {
        okay=false;
       }
      }
    };

    var blob = file.slice(start, stop + 1);
    readertwo.readAsBinaryString(blob); //BinaryString(blob);
    reader.readAsDataURL(blob); //BinaryString(blob);
    
}

function alate() {
        document.getElementById('viafile').value='';
}

setTimeout(alater, 1000);
</script></head><body><h1>Text of PDF</h1><h3>RJM Programming - July, 2018</h3><table style='padding: 8 8 8 8; border:5px solid red;'><tr><td style='vertical-align:top;'>PDF Contents or URL or <input type='hidden' id='fil' name='fil' value='' /><input id='file' type='file' name='file'><span id='mode' name='mode'>&nbsp;&nbsp;</span><span class='readBytesButtons'><button style='display:none;' data-endbyte='4' data-startbyte='0'>1-5</button><button style='display:none;' data-endbyte='14' data-startbyte='5'>6-15</button><button style='display:none;' data-endbyte='7' data-startbyte='6'>7-8</button><button style='display:none;' id=pbutton onclick='readBlob(0,0);'>Process</button></span></td><td style='vertical-align:top;'><form id='myform' enctype='application/x-www-form-urlencoded' action='./textofpdf.php' method='POST'><input type=hidden name=viafile id=viafile value=''></input><textarea id=pdf name=pdf cols=80 rows=20 value=''></textarea><textarea style='display:none;' id=showpdf cols=80 rows=20 value=''></textarea></td><td style='vertical-align:top;'><input id=fsubmit style='background-color:yellow;' type=submit value='Extract Text of PDF'></input></form></td></tr></table></body></html>";
} else if (strpos(strtolower(("~" . $inpdf)), "~http") !== false) {
  file_put_contents("/tmp/textofpdf.in", file_get_contents(str_replace('HTTPS:','http:',str_replace('Https:','http:',str_replace('https:','http:',str_replace('+',' ',$inpdf))))));
  passthru("gs -q -dNODISPLAY -dSAFER -dDELAYBIND -dWRITESYSTEMDICT -dSIMPLE  -f ps2ascii.ps -dQUIET - < /tmp/textofpdf.in ; rm -f /tmp/textofpdf.in");
} else if ($viafile != "") { //strlen($inpdf) > 200) {   
  file_put_contents("/tmp/textofpdf.in", $inpdf);
  passthru("gs -q -dNODISPLAY -dSAFER -dDELAYBIND -dWRITESYSTEMDICT -dSIMPLE  -f ps2ascii.ps -dQUIET - < /tmp/textofpdf.in ; rm -f /tmp/textofpdf.in");
} else {   
  passthru("gs -q -dNODISPLAY -dSAFER -dDELAYBIND -dWRITESYSTEMDICT -dSIMPLE  -f ps2ascii.ps  \"" . str_replace('+',' ',$inpdf) . "\" -dQUIET -c quit");
}
exit;
