summaryrefslogtreecommitdiff
path: root/includes/htmlparser/html_parser_inc.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/htmlparser/html_parser_inc.php')
-rw-r--r--includes/htmlparser/html_parser_inc.php492
1 files changed, 0 insertions, 492 deletions
diff --git a/includes/htmlparser/html_parser_inc.php b/includes/htmlparser/html_parser_inc.php
deleted file mode 100644
index 83b0f66..0000000
--- a/includes/htmlparser/html_parser_inc.php
+++ /dev/null
@@ -1,492 +0,0 @@
-<?php
-if (!defined("_ECHOSERVER_HTML_PARSER")) {
-define("_ECHOSERVER_HTML_PARSER",1);
-
-class HtmlParser {
- var $pos,
- $tagpos,
- $length,
- $data,
- $stacktag,
- $stacktagpos,
- $name,
- $quotstate,
- $quottype,
- $parname,
- $pars,
- $tagname,
- $content,
- $contentpos,
- $allreadyparsed,
- $pg,
- $dc,
- $nc,
- $qc,
- $prevstate,
- $processtag,
- $processpar,
- $processparvalue,
- $c,
- $cp,
- $text,
- $incomment,
- $skipto,
- $tagreg,
- $wasquot;
-/**********************************************************************************
- * Class constructor
- **********************************************************************************/
- function HtmlParser($data,$grammar,$name="",$datatype=0) {
- $this->dc=array(" ","\t","\r","\n","<",">","\"","'","=","/");
- $this->nc=array("<",">","=","/");
- $this->qc=array("\"","'");
- $this->sc=array("\r","\n"," ","\t");
- $this->prevstate=array("state"=>0,"word"=>"");
- $this->pg=&$grammar;
- $this->pos=0;
- $this->stacktag=array();
- $this->stacktagpos=-1;
- $this->content=array();
- $this->content["contentpos"]=-1;
- $this->c=&$this->content;
- $this->cp=-1;
- $this->quotstate=-1;
- $this->allreadyparsed=0;
- $this->text="";
- $this->processtag=0;
- $this->processpar=0;
- $this->processparvalue=0;
- $this->slevel=array(0);
- $this->slevelpos=0;
- $this->quottype="";
- $this->skipto="";
- $this->incomment=0;
- $this->tagreg=array();
- $this->wasquot=0;
-
- if(isset($this->data) && is_array($this->data)) {
- $this->content=&$data;
- $this->allreadyparsed=1;
- return;
- }
- clearstatcache();
- $this->name=$data;
- if (!$datatype) {
- $this->name=$name;
- $this->data=$data;
- $this->length=strlen($this->data);
- return;
- }
- if (!$fp=fopen($this->name,"rb")) {
- $this->SetError(1,"Can't open file $this->name.",0,0,"Error");
- return;
- }
- flock($fp,1);
- $this->data=fread($fp,filesize($this->name));
- flock($fp,3);
- fclose($fp);
- $this->length=strlen($this->data);
- }
-
-/********************************************************************************************
- * Get word from data
- ********************************************************************************************/
- function GetWord(&$word) {
- $word="";
- $this->wasquot=0;
- if ($this->pos>$this->length) return false;
- while (1) {
- if ($this->pos>$this->length) return false;
- if ($this->pos==$this->length) {
- $this->pos++;
- return true;
- }
- if ($this->data[$this->pos]=="<") {
- if ($this->data[$this->pos+1]=="!")
- if ($this->length>6 && $this->length-$this->pos+1>6) {
- if (substr($this->data,$this->pos,4)=="<!--") {
- $this->incomment=1;
- while($this->pos<$this->length-3) {
- if (substr($this->data,$this->pos,3)=="-->") {
- $word.="-->";
- $this->pos+=3;
- break;
- } else
- $word.=$this->data[$this->pos++];
- }
- if ($this->incomment) break;
- }
- }
- }
- if (!$this->processtag) {
- if ($this->data[$this->pos]=="<") {
- $this->processtag=1;
- $this->tagpos=strlen($this->text);
- } else {
- $this->text.=$this->data[$this->pos++];
- continue;
- }
- }
- if (in_array($this->data[$this->pos],$this->dc)) {
- if (($this->data[$this->pos]=="<" || $this->data[$this->pos]==">") && $this->quotstate==-1 && $this->processparvalue) {
- $this->processparvalue=0;
- return true;
- }
- if (in_array($this->data[$this->pos],$this->sc) && $this->quotstate==-1) {
- $this->text.=$this->data[$this->pos++];
- if (strlen($word)) {
- if ($this->processparvalue) $this->processparvalue=0;
- return true;
- } else
- continue;
- }
- if (!strlen($word)) {
- if (in_array($this->data[$this->pos],$this->qc) && $this->processpar) {
- if ($this->quotstate==-1) {
- $this->wasquot=1;
- $this->quotstate*=-1;
- $this->quottype=$this->data[$this->pos];
- $this->text.=$this->data[$this->pos++];
- continue;
- } elseif ($this->quottype==$this->data[$this->pos]) {
- $this->quotstate*=-1;
- $this->quottype=$this->data[$this->pos];
- $this->processpar=$this->processparvalue=0;
- $this->text.=$this->data[$this->pos++];
- return true;
- }
- } elseif (in_array($this->data[$this->pos],$this->nc)) {
- $word.=$this->data[$this->pos];
- $this->text.=$this->data[$this->pos++];
- if ($this->processparvalue)
- continue;
- else
- return true;
- }
- } else {
- if (in_array($this->data[$this->pos],$this->qc) && $this->processpar) {
- if ($this->quotstate==1) {
- if ($this->data[$this->pos]==$this->quottype && $this->processparvalue) {
- $this->quotstate*=-1;
- $this->quottype=$this->data[$this->pos];
- $this->processpar=$this->processparvalue=0;
- $this->text.=$this->data[$this->pos++];
-// continue;
- } else {
- if ($this->data[$this->pos]==$this->quottype) {
- $this->quotstate*=-1;
- $this->quottype="";
- }
- $word.=$this->data[$this->pos];
- $this->text.=$this->data[$this->pos++];
- continue;
- }
- }
- return true;
- } else {
- if (in_array($this->data[$this->pos],$this->nc)) {
- if ($this->quotstate==-1) {
- if ($this->processparvalue) {
- if($this->data[$this->pos]!="/" && $this->data[$this->pos]!="=") return true;
- $word.=$this->data[$this->pos];
- $this->text.=$this->data[$this->pos++];
- continue;
- }
- } else {
- $word.=$this->data[$this->pos];
- $this->text.=$this->data[$this->pos++];
- continue;
- }
- return true;
- } elseif ($this->quotstate==-1 && $this->processparvalue && strlen($word)) {
- if ($this->data[$this->pos]==" ") {
- $this->text.=$this->data[$this->pos++];
- $this->processparvalue=0;
- return true;
- }
- }
- }
- }
- }
- $word.=$this->data[$this->pos];
- $this->text.=$this->data[$this->pos++];
- }
- return true;
- }
-
-/********************************************************************************************
- * Parse HTML code
- ********************************************************************************************
-<tagname [parname=|parnane=["|']parvalue["|']|parname][/]> |
-<[/]tagname>
-
-in/state 0 1 2 3 4 5 6 7 8
-< 1 -1 -1 -1 -1 -1 -1 -1 -1
-/ -1 7 6 6 6 6 -1 -1 -1
-= -1 -1 -1 4 -1 -1 -1 -1 -1
-> -1 -1 -2 -2 -2 -2 -2 -1 -3
-anyword -1 2 3 3 5 3 -1 8 -1
-
--3 end parse close tag
--2 end parse open tag
--1 error
- 0 begin parse
- 1 got '<', waiting '/' or any word as tag name
- 2 got any word as tagname, waiting '/' or '>' or any word as parameter name
- 3 got any word as parameter name, waiting '/' or '>' or '=' or any word as parameter name
- 4 got '=' waiting '/' or '>' or any word as parameter value
- 5 got any word as parameter value, waiting '/' or '>' or any word as parameter name
- 6 got '/' waiting '>'
- 7 got '/', waiting any word as close tagname
- 8 got any word as close tag name, waiting '>'
- ********************************************************************************************/
- function Parse() {
- $automat=array(
-// states 0 1 2 3 4 5 6 7 8
- "0"=>array( 1, -1, -1, -1, -1, -1, -1, -1, -1),// <
- "1"=>array(-1, 7, 6, 6, 6, 6, -1, -1, -1),// /
- "2"=>array(-1, -1, -1, 4, -1, -1, -1, -1, -1),// =
- "3"=>array(-1, -1, -2, -2, -2, -2, -2, -1, -3),// >
- "4"=>array(-1, 2, 3, 3, 5, 3, -1, 8, -1) // any word
- );
- if (!strlen($this->data)) return;
- $instates=array("<"=>0,"/"=>1,"="=>2,">"=>3);
- $parcount=0;
- $state=0;
- $this->c=&$this->content;
- $this->cp=&$this->content["contentpos"];
- $this->stacktag[0]["tag"]=&$this->c;
- $this->stacktag[0]["level"]=&$this->slevel;
- $this->stacktag[0]["levelpos"]=0;
- $this->stacktagpos=0;
- while(1) {
- if (!$isword=$this->GetWord($word)) break;
- $w=strtolower($word);
- if (!isset($instates[$w]))
- $instate=4;
- else
- $instate=$instates[$w];
-//print htmlspecialchars($word).",$state,$instate,$this->quottype<br>";
- $state=$automat[$instate][$state];
- if ($this->wasquot && $state==6) $state=5;
-//print htmlspecialchars($word).",$state<br>";
- switch($state) {
- case -3:// end parse close tag
- if (strlen($this->skipto) && $this->tagname!=$this->skipto) {
- $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
- $this->pars=array();
- break;
- } else
- $this->skipto="";
- $script=($this->tagname=="script") ? 1:0;
- $this->AddNewText(substr($this->text,0,$this->tagpos),$script);
- $this->AddNewTag(0);
- $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
- $this->quottype="";
- $this->quotstate=-1;
- $this->text="";
- $this->pars=array();
- $this->tagpos=0;
- break;
- case -2:// end parse open tag
- if (strlen($this->skipto)) {
- $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
- $this->pars=array();
- break;
- }
- $this->AddNewText(substr($this->text,0,$this->tagpos));
- $this->AddNewTag(1,$xmlclose);
- $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
- $this->quottype="";
- $this->quotstate=-1;
- $this->text="";
- $this->pars=array();
- $this->tagpos=0;
- if (isset($this->pg[$this->tagname]["nohavetags"]) && !strlen($this->skipto)) $this->skipto=$this->tagname;
- break;
- case -1:// Error found
- $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
- $this->pars=array();
- if ($this->incomment) {
- if (strlen($this->text)) {
- $this->AddNewText($this->text);
- $this->text="";
- $this->tagpos=0;
- }
- $this->AddNewText($word,0,1);
- $this->incomment=0;
- break;
- }
- if ($word=="<") {
- $state=1;
- $this->processtag=1;
- $this->processparvalue=0;
- $this->tagpos=strlen($this->text)-1;
- $this->quottype="";
- $this->quotstate=-1;
- }
- break;
- case 2:// got any word as tagname, waiting '/' or '>' or any word as parameter name
- $this->tagname=$w;
- $xmlclose=0;
- if (!ereg("^[a-zA-Z0-9!_-]+$",$this->tagname) || strlen($this->skipto)) {
- $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
- $this->quottype="";
- $this->quotstate=-1;
- $this->pars=array();
- break;
- }
- break;
- case 3:// got any word as parameter name, waiting '/' or '>' or '=' or any word as parameter name
- $this->parname=$w;
- if (!ereg("^[a-zA-Z0-9!_-]+$",$this->parname) || strlen($this->skipto)) {
- $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
- $this->quottype="";
- $this->quotstate=-1;
- $this->pars=array();
- break;
- }
- $this->processpar=1;
- if ($w!="/") {
- $parcount++;
- $this->pars[$this->parname]["single"]=1;
- } else
- $xmlclose=1;
- break;
- case 4:// got '=' waiting '/' or '>' or any word as parameter value
- $this->processparvalue=1;
- break;
- case 5:// got any word as parameter value, waiting '/' or '>' or any word as parameter name
- if ($this->parname!="/") {
- unset($this->pars[$this->parname]["single"]);
- $this->pars[$this->parname]["value"]=$word;
- $this->pars[$this->parname]["quot"]=$this->quottype;
- }
- $this->quottype="";
- $this->processpar=$this->processparvalue=0;
- break;
- case 6:// got '/' waiting '>'
- $xmlclose=1;
- break;
- case 8:// got any word as close tag name, waiting '>'
- $this->tagname=$w;
- break;
- }
- $this->prevstate["states"]=$state;
- $this->prevstate["word"]=$word;
- }
- if (strlen($this->text)) $this->AddNewText($this->text);
- }
-/********************************************************************************************
- * Add new tag
- ********************************************************************************************/
- function AddNewTag($open,$xmlclose=0) {
- $actionclose=0;
- if (!$open && in_array( $this->tagname, $this->pg ) && $this->pg[$this->tagname]["endtag"]!="absent") $actionclose=1;
-
- if ($open)
- for ($i=$this->stacktagpos;$i>0;$i--) {
- $ct=&$this->stacktag[$i]["tag"];
- $t=&$ct[$ct["contentpos"]];
- $tagname=$t["data"]["name"];
- if (isset($this->pg[$tagname]["closeon"])) {
- if (isset($this->pg[$tagname]["closeon"]["in"]) && sizeof($this->pg[$tagname]["closeon"]["in"]) && in_array($this->tagname,$this->pg[$tagname]["closeon"]["in"])
- || isset($this->pg[$tagname]["closeon"]["notin"]) && sizeof($this->pg[$tagname]["closeon"]["notin"]) && !in_array($this->tagname,$this->pg[$tagname]["closeon"]["notin"])) {
- $actionclose=2;
- break;
- }
- }
- if ($actionclose!=2) $i=-1;
- }
-
- if ($actionclose) {
- if ($actionclose==1) {
- $i=$this->FindTag($this->tagname);
- if ($i>-1)
- if ($this->tagreg[$this->tagname]!=$this->stacktag[$i]["num"])
- $i=-1;
- }
- if ($i>-1) {
- $this->c=&$this->stacktag[$i]["tag"];
- $this->cp=&$this->c["contentpos"];
- $this->stacktagpos=$i;
- if ($actionclose==1) {
- $c=&$this->c[$this->c["contentpos"]]["content"];
- $cp=&$this->c[$this->c["contentpos"]]["content"]["contentpos"];
- $cp++;
- $c[$cp]["type"]="tag";
- $c[$cp]["data"]["name"]=$this->tagname;
- $c[$cp]["data"]["type"]="close";
- if (isset($this->tagreg[$this->tagname]))
- if ($this->tagreg[$this->tagname])
- $this->tagreg[$this->tagname]--;
- $this->stacktag[$this->stacktagpos]["num"]=$this->tagreg[$this->tagname];
- $this->stacktagpos--;
- }
- if ($this->stacktagpos<sizeof($this->stacktag))
- for ($i=$this->stacktagpos+1;$i<sizeof($this->stacktag);$i++)
- unset($this->stacktag[$i]);
- if ($actionclose==1) return;
- }
- }
- $this->cp++;
- $this->c[$this->cp]["type"]="tag";
- $this->c[$this->cp]["data"]["name"]=$this->tagname;
- $this->c[$this->cp]["data"]["type"]=($open) ? "open" : "close";
- if (!$open)
- if (isset($this->tagreg[$this->tagname]))
- if ($this->tagreg[$this->tagname])
- $this->tagreg[$this->tagname]--;
- if ($xmlclose) $this->c[$this->cp]["xmlclose"]=1;
- if (sizeof($this->pars)) $this->c[$this->cp]["pars"]=$this->pars;
- if ($open && !$xmlclose && in_array( $this->tagname, $this->pg ) && $this->pg[$this->tagname]["endtag"]!="absent") {
- if (!isset($this->tagreg[$this->tagname])) $this->tagreg[$this->tagname]=0;
- $this->tagreg[$this->tagname]++;
- $this->stacktagpos++;
- $this->stacktag[$this->stacktagpos]["tag"]=&$this->c;
- $this->stacktag[$this->stacktagpos]["num"]=$this->tagreg[$this->tagname];
- $this->c[$this->cp]["content"]=array();
- $this->c[$this->cp]["content"]["contentpos"]=-1;
- $this->c=&$this->c[$this->cp]["content"];
- $this->cp=&$this->c["contentpos"];
- }
- }
-
-/********************************************************************************************
- * Add new text
- ********************************************************************************************/
- function AddNewText($text,$script=0,$comment=0) {
- if (!strlen($text)) return;
- $this->cp++;
- if (!$comment)
- $this->c[$this->cp]["type"]="text";
- else
- $this->c[$this->cp]["type"]="comment";
- if ($script) {
- $inputarray=array("/_top/","/top.location.href/","/([ \n]+)?window\.name/","/parent.location/");
- $replarray=array("_echoserver_file_space","parent.frames('_echoserver_file_space').src","//window.name","parent.frames('_echoserver_file_space').src");
-/*
- $text=str_replace("_top","_echoserver_file_space",$text);
- $text=str_replace("top.location.href","parent.frames('_echoserver_file_space').src",$text);
- $text=preg_replace("/([ \n]+)?window\.name/","//window.name",$text);
-*/
- $text=preg_replace($inputarray,$replarray,$text);
-
- }
- $this->c[$this->cp]["data"]=$text;
- $this->text="";
- }
-
-/********************************************************************************************
- * Find first tag in stack
- ********************************************************************************************/
- function FindTag($tagname) {
- for($i=$this->stacktagpos;$i>=0;$i--)
- if ($this->stacktag[$i]["tag"][$this->stacktag[$i]["tag"]["contentpos"]]["data"]["name"]==$tagname)
- return $i;
- return -1;
- }
-}
-
-} //_ECHOSERVER_HTML_PARSER
-?>