summaryrefslogtreecommitdiff
path: root/includes/htmlparser/html_parser_inc.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/htmlparser/html_parser_inc.php')
-rwxr-xr-xincludes/htmlparser/html_parser_inc.php492
1 files changed, 492 insertions, 0 deletions
diff --git a/includes/htmlparser/html_parser_inc.php b/includes/htmlparser/html_parser_inc.php
new file mode 100755
index 0000000..5965711
--- /dev/null
+++ b/includes/htmlparser/html_parser_inc.php
@@ -0,0 +1,492 @@
+<?php
+if (!defined("_ECHOSERVER_HTML_PARSER")) {
+define("_ECHOSERVER_HTML_PARSER",1);
+
+class HtmlParser {
+ var $pos,
+ $tagpos,
+ $length,
+ $data,
+ $stacktag,
+ $stacktagpos,
+ $name,
+ $quotstate,
+ $quottype,
+ $parname,
+ $pars,
+ $tagname,
+ $content,
+ $contentpos,
+ $allreadyparsed,
+ $pg,
+ $dc,
+ $nc,
+ $qc,
+ $prevstate,
+ $processtag,
+ $processpar,
+ $processparvalue,
+ $c,
+ $cp,
+ $text,
+ $incomment,
+ $skipto,
+ $tagreg,
+ $wasquot;
+/**********************************************************************************
+ * Class constructor
+ **********************************************************************************/
+ function HtmlParser($data,$grammar,$name="",$datatype=0) {
+ $this->dc=[" ","\t","\r","\n","<",">",'"',"'","=","/"];
+ $this->nc=["<",">","=","/"];
+ $this->qc=['"',"'"];
+ $this->sc=["\r","\n"," ","\t"];
+ $this->prevstate=["state"=>0,"word"=>""];
+ $this->pg=&$grammar;
+ $this->pos=0;
+ $this->stacktag=[];
+ $this->stacktagpos=-1;
+ $this->content=[];
+ $this->content["contentpos"]=-1;
+ $this->c=&$this->content;
+ $this->cp=-1;
+ $this->quotstate=-1;
+ $this->allreadyparsed=0;
+ $this->text="";
+ $this->processtag=0;
+ $this->processpar=0;
+ $this->processparvalue=0;
+ $this->slevel=[0];
+ $this->slevelpos=0;
+ $this->quottype="";
+ $this->skipto="";
+ $this->incomment=0;
+ $this->tagreg=[];
+ $this->wasquot=0;
+
+ if(isset($this->data) && is_array($this->data)) {
+ $this->content=&$data;
+ $this->allreadyparsed=1;
+ return;
+ }
+ clearstatcache();
+ $this->name=$data;
+ if (!$datatype) {
+ $this->name=$name;
+ $this->data=$data;
+ $this->length=strlen($this->data);
+ return;
+ }
+ if (!$fp=fopen($this->name,"rb")) {
+ $this->SetError(1,"Can't open file $this->name.",0,0,"Error");
+ return;
+ }
+ flock($fp,1);
+ $this->data=fread($fp,filesize($this->name));
+ flock($fp,3);
+ fclose($fp);
+ $this->length=strlen($this->data);
+ }
+
+/********************************************************************************************
+ * Get word from data
+ ********************************************************************************************/
+ function GetWord(&$word) {
+ $word="";
+ $this->wasquot=0;
+ if ($this->pos>$this->length) return false;
+ while (1) {
+ if ($this->pos>$this->length) return false;
+ if ($this->pos==$this->length) {
+ $this->pos++;
+ return true;
+ }
+ if ($this->data[$this->pos]=="<") {
+ if ($this->data[$this->pos+1]=="!")
+ if ($this->length>6 && $this->length-$this->pos+1>6) {
+ if (substr($this->data,$this->pos,4)=="<!--") {
+ $this->incomment=1;
+ while($this->pos<$this->length-3) {
+ if (substr($this->data,$this->pos,3)=="-->") {
+ $word.="-->";
+ $this->pos+=3;
+ break;
+ }
+ $word.=$this->data[$this->pos++];
+ }
+ if ($this->incomment) break;
+ }
+ }
+ }
+ if (!$this->processtag) {
+ if ($this->data[$this->pos]=="<") {
+ $this->processtag=1;
+ $this->tagpos=strlen($this->text);
+ } else {
+ $this->text.=$this->data[$this->pos++];
+ continue;
+ }
+ }
+ if (in_array($this->data[$this->pos],$this->dc)) {
+ if (($this->data[$this->pos]=="<" || $this->data[$this->pos]==">") && $this->quotstate==-1 && $this->processparvalue) {
+ $this->processparvalue=0;
+ return true;
+ }
+ if (in_array($this->data[$this->pos],$this->sc) && $this->quotstate==-1) {
+ $this->text.=$this->data[$this->pos++];
+ if (strlen($word)) {
+ if ($this->processparvalue) $this->processparvalue=0;
+ return true;
+ }
+ continue;
+ }
+ if (!strlen($word)) {
+ if (in_array($this->data[$this->pos],$this->qc) && $this->processpar) {
+ if ($this->quotstate==-1) {
+ $this->wasquot=1;
+ $this->quotstate*=-1;
+ $this->quottype=$this->data[$this->pos];
+ $this->text.=$this->data[$this->pos++];
+ continue;
+ } elseif ($this->quottype==$this->data[$this->pos]) {
+ $this->quotstate*=-1;
+ $this->quottype=$this->data[$this->pos];
+ $this->processpar=$this->processparvalue=0;
+ $this->text.=$this->data[$this->pos++];
+ return true;
+ }
+ } elseif (in_array($this->data[$this->pos],$this->nc)) {
+ $word.=$this->data[$this->pos];
+ $this->text.=$this->data[$this->pos++];
+ if ($this->processparvalue)
+ continue;
+
+ return true;
+ }
+ } else {
+ if (in_array($this->data[$this->pos],$this->qc) && $this->processpar) {
+ if ($this->quotstate==1) {
+ if ($this->data[$this->pos]==$this->quottype && $this->processparvalue) {
+ $this->quotstate*=-1;
+ $this->quottype=$this->data[$this->pos];
+ $this->processpar=$this->processparvalue=0;
+ $this->text.=$this->data[$this->pos++];
+// continue;
+ } else {
+ if ($this->data[$this->pos]==$this->quottype) {
+ $this->quotstate*=-1;
+ $this->quottype="";
+ }
+ $word.=$this->data[$this->pos];
+ $this->text.=$this->data[$this->pos++];
+ continue;
+ }
+ }
+ return true;
+ }
+ if (in_array($this->data[$this->pos],$this->nc)) {
+ if ($this->quotstate==-1) {
+ if ($this->processparvalue) {
+ if($this->data[$this->pos]!="/" && $this->data[$this->pos]!="=") return true;
+ $word.=$this->data[$this->pos];
+ $this->text.=$this->data[$this->pos++];
+ continue;
+ }
+ } else {
+ $word.=$this->data[$this->pos];
+ $this->text.=$this->data[$this->pos++];
+ continue;
+ }
+ return true;
+ } elseif ($this->quotstate==-1 && $this->processparvalue && strlen($word)) {
+ if ($this->data[$this->pos]==" ") {
+ $this->text.=$this->data[$this->pos++];
+ $this->processparvalue=0;
+ return true;
+ }
+ }
+
+ }
+ }
+ $word.=$this->data[$this->pos];
+ $this->text.=$this->data[$this->pos++];
+ }
+ return true;
+ }
+
+/********************************************************************************************
+ * Parse HTML code
+ ********************************************************************************************
+<tagname [parname=|parnane=["|']parvalue["|']|parname][/]> |
+<[/]tagname>
+
+in/state 0 1 2 3 4 5 6 7 8
+< 1 -1 -1 -1 -1 -1 -1 -1 -1
+/ -1 7 6 6 6 6 -1 -1 -1
+= -1 -1 -1 4 -1 -1 -1 -1 -1
+> -1 -1 -2 -2 -2 -2 -2 -1 -3
+anyword -1 2 3 3 5 3 -1 8 -1
+
+-3 end parse close tag
+-2 end parse open tag
+-1 error
+ 0 begin parse
+ 1 got '<', waiting '/' or any word as tag name
+ 2 got any word as tagname, waiting '/' or '>' or any word as parameter name
+ 3 got any word as parameter name, waiting '/' or '>' or '=' or any word as parameter name
+ 4 got '=' waiting '/' or '>' or any word as parameter value
+ 5 got any word as parameter value, waiting '/' or '>' or any word as parameter name
+ 6 got '/' waiting '>'
+ 7 got '/', waiting any word as close tagname
+ 8 got any word as close tag name, waiting '>'
+ ********************************************************************************************/
+ function Parse() {
+ $automat=[
+// states 0 1 2 3 4 5 6 7 8
+ "0"=>[ 1, -1, -1, -1, -1, -1, -1, -1, -1],// <
+ "1"=>[-1, 7, 6, 6, 6, 6, -1, -1, -1],// /
+ "2"=>[-1, -1, -1, 4, -1, -1, -1, -1, -1],// =
+ "3"=>[-1, -1, -2, -2, -2, -2, -2, -1, -3],// >
+ "4"=>[-1, 2, 3, 3, 5, 3, -1, 8, -1], // any word
+ ];
+ if (!strlen($this->data)) return;
+ $instates=["<"=>0,"/"=>1,"="=>2,">"=>3];
+ $parcount=0;
+ $state=0;
+ $this->c=&$this->content;
+ $this->cp=&$this->content["contentpos"];
+ $this->stacktag[0]["tag"]=&$this->c;
+ $this->stacktag[0]["level"]=&$this->slevel;
+ $this->stacktag[0]["levelpos"]=0;
+ $this->stacktagpos=0;
+ while(1) {
+ if (!$isword=$this->GetWord($word)) break;
+ $w=strtolower($word);
+ if (!isset($instates[$w]))
+ $instate=4;
+ else
+ $instate=$instates[$w];
+//print htmlspecialchars($word).",$state,$instate,$this->quottype<br>";
+ $state=$automat[$instate][$state];
+ if ($this->wasquot && $state==6) $state=5;
+//print htmlspecialchars($word).",$state<br>";
+ switch($state) {
+ case -3:// end parse close tag
+ if (strlen($this->skipto) && $this->tagname!=$this->skipto) {
+ $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
+ $this->pars=[];
+ break;
+ }
+ $this->skipto="";
+ $script=($this->tagname=="script") ? 1:0;
+ $this->AddNewText(substr($this->text,0,$this->tagpos),$script);
+ $this->AddNewTag(0);
+ $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
+ $this->quottype="";
+ $this->quotstate=-1;
+ $this->text="";
+ $this->pars=[];
+ $this->tagpos=0;
+ break;
+ case -2:// end parse open tag
+ if (strlen($this->skipto)) {
+ $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
+ $this->pars=[];
+ break;
+ }
+ $this->AddNewText(substr($this->text,0,$this->tagpos));
+ $this->AddNewTag(1,$xmlclose);
+ $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
+ $this->quottype="";
+ $this->quotstate=-1;
+ $this->text="";
+ $this->pars=[];
+ $this->tagpos=0;
+ if (isset($this->pg[$this->tagname]["nohavetags"]) && !strlen($this->skipto)) $this->skipto=$this->tagname;
+ break;
+ case -1:// Error found
+ $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
+ $this->pars=[];
+ if ($this->incomment) {
+ if (strlen($this->text)) {
+ $this->AddNewText($this->text);
+ $this->text="";
+ $this->tagpos=0;
+ }
+ $this->AddNewText($word,0,1);
+ $this->incomment=0;
+ break;
+ }
+ if ($word=="<") {
+ $state=1;
+ $this->processtag=1;
+ $this->processparvalue=0;
+ $this->tagpos=strlen($this->text)-1;
+ $this->quottype="";
+ $this->quotstate=-1;
+ }
+ break;
+ case 2:// got any word as tagname, waiting '/' or '>' or any word as parameter name
+ $this->tagname=$w;
+ $xmlclose=0;
+ if (!ereg("^[a-zA-Z0-9!_-]+$",$this->tagname) || strlen($this->skipto)) {
+ $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
+ $this->quottype="";
+ $this->quotstate=-1;
+ $this->pars=[];
+ break;
+ }
+ break;
+ case 3:// got any word as parameter name, waiting '/' or '>' or '=' or any word as parameter name
+ $this->parname=$w;
+ if (!ereg("^[a-zA-Z0-9!_-]+$",$this->parname) || strlen($this->skipto)) {
+ $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
+ $this->quottype="";
+ $this->quotstate=-1;
+ $this->pars=[];
+ break;
+ }
+ $this->processpar=1;
+ if ($w!="/") {
+ $parcount++;
+ $this->pars[$this->parname]["single"]=1;
+ } else
+ $xmlclose=1;
+ break;
+ case 4:// got '=' waiting '/' or '>' or any word as parameter value
+ $this->processparvalue=1;
+ break;
+ case 5:// got any word as parameter value, waiting '/' or '>' or any word as parameter name
+ if ($this->parname!="/") {
+ unset($this->pars[$this->parname]["single"]);
+ $this->pars[$this->parname]["value"]=$word;
+ $this->pars[$this->parname]["quot"]=$this->quottype;
+ }
+ $this->quottype="";
+ $this->processpar=$this->processparvalue=0;
+ break;
+ case 6:// got '/' waiting '>'
+ $xmlclose=1;
+ break;
+ case 8:// got any word as close tag name, waiting '>'
+ $this->tagname=$w;
+ break;
+ }
+ $this->prevstate["states"]=$state;
+ $this->prevstate["word"]=$word;
+ }
+ if (strlen($this->text)) $this->AddNewText($this->text);
+ }
+/********************************************************************************************
+ * Add new tag
+ ********************************************************************************************/
+ function AddNewTag($open,$xmlclose=0) {
+ $actionclose=0;
+ if (!$open && in_array( $this->tagname, $this->pg ) && $this->pg[$this->tagname]["endtag"]!="absent") $actionclose=1;
+
+ if ($open)
+ for ($i=$this->stacktagpos;$i>0;$i--) {
+ $ct=&$this->stacktag[$i]["tag"];
+ $t=&$ct[$ct["contentpos"]];
+ $tagname=$t["data"]["name"];
+ if (isset($this->pg[$tagname]["closeon"])) {
+ if (isset($this->pg[$tagname]["closeon"]["in"]) && sizeof($this->pg[$tagname]["closeon"]["in"]) && in_array($this->tagname,$this->pg[$tagname]["closeon"]["in"])
+ || isset($this->pg[$tagname]["closeon"]["notin"]) && sizeof($this->pg[$tagname]["closeon"]["notin"]) && !in_array($this->tagname,$this->pg[$tagname]["closeon"]["notin"])) {
+ $actionclose=2;
+ break;
+ }
+ }
+ if ($actionclose!=2) $i=-1;
+ }
+
+ if ($actionclose) {
+ if ($actionclose==1) {
+ $i=$this->FindTag($this->tagname);
+ if ($i>-1)
+ if ($this->tagreg[$this->tagname]!=$this->stacktag[$i]["num"])
+ $i=-1;
+ }
+ if ($i>-1) {
+ $this->c=&$this->stacktag[$i]["tag"];
+ $this->cp=&$this->c["contentpos"];
+ $this->stacktagpos=$i;
+ if ($actionclose==1) {
+ $c=&$this->c[$this->c["contentpos"]]["content"];
+ $cp=&$this->c[$this->c["contentpos"]]["content"]["contentpos"];
+ $cp++;
+ $c[$cp]["type"]="tag";
+ $c[$cp]["data"]["name"]=$this->tagname;
+ $c[$cp]["data"]["type"]="close";
+ if (isset($this->tagreg[$this->tagname]))
+ if ($this->tagreg[$this->tagname])
+ $this->tagreg[$this->tagname]--;
+ $this->stacktag[$this->stacktagpos]["num"]=$this->tagreg[$this->tagname];
+ $this->stacktagpos--;
+ }
+ if ($this->stacktagpos<sizeof($this->stacktag))
+ for ($i=$this->stacktagpos+1;$i<sizeof($this->stacktag);$i++)
+ unset($this->stacktag[$i]);
+ if ($actionclose==1) return;
+ }
+ }
+ $this->cp++;
+ $this->c[$this->cp]["type"]="tag";
+ $this->c[$this->cp]["data"]["name"]=$this->tagname;
+ $this->c[$this->cp]["data"]["type"]=($open) ? "open" : "close";
+ if (!$open)
+ if (isset($this->tagreg[$this->tagname]))
+ if ($this->tagreg[$this->tagname])
+ $this->tagreg[$this->tagname]--;
+ if ($xmlclose) $this->c[$this->cp]["xmlclose"]=1;
+ if (sizeof($this->pars)) $this->c[$this->cp]["pars"]=$this->pars;
+ if ($open && !$xmlclose && in_array( $this->tagname, $this->pg ) && $this->pg[$this->tagname]["endtag"]!="absent") {
+ if (!isset($this->tagreg[$this->tagname])) $this->tagreg[$this->tagname]=0;
+ $this->tagreg[$this->tagname]++;
+ $this->stacktagpos++;
+ $this->stacktag[$this->stacktagpos]["tag"]=&$this->c;
+ $this->stacktag[$this->stacktagpos]["num"]=$this->tagreg[$this->tagname];
+ $this->c[$this->cp]["content"]=[];
+ $this->c[$this->cp]["content"]["contentpos"]=-1;
+ $this->c=&$this->c[$this->cp]["content"];
+ $this->cp=&$this->c["contentpos"];
+ }
+ }
+
+/********************************************************************************************
+ * Add new text
+ ********************************************************************************************/
+ function AddNewText($text,$script=0,$comment=0) {
+ if (!strlen($text)) return;
+ $this->cp++;
+ if (!$comment)
+ $this->c[$this->cp]["type"]="text";
+ else
+ $this->c[$this->cp]["type"]="comment";
+ if ($script) {
+ $inputarray=["/_top/","/top.location.href/","/([ \n]+)?window\.name/","/parent.location/"];
+ $replarray=["_echoserver_file_space","parent.frames('_echoserver_file_space').src","//window.name","parent.frames('_echoserver_file_space').src"];
+/*
+ $text=str_replace("_top","_echoserver_file_space",$text);
+ $text=str_replace("top.location.href","parent.frames('_echoserver_file_space').src",$text);
+ $text=preg_replace("/([ \n]+)?window\.name/","//window.name",$text);
+*/
+ $text=preg_replace($inputarray,$replarray,$text);
+
+ }
+ $this->c[$this->cp]["data"]=$text;
+ $this->text="";
+ }
+
+/********************************************************************************************
+ * Find first tag in stack
+ ********************************************************************************************/
+ function FindTag($tagname) {
+ for($i=$this->stacktagpos;$i>=0;$i--)
+ if ($this->stacktag[$i]["tag"][$this->stacktag[$i]["tag"]["contentpos"]]["data"]["name"]==$tagname)
+ return $i;
+ return -1;
+ }
+}
+
+} //_ECHOSERVER_HTML_PARSER
+?>