/*HTMLSYN -- syntax checker for some pairings in HTML*/ /* David McRitchie -- "The REXX Macros Toolbox" -- 1995/05/25 */ /* http://www.geocities.com/davemcritchie/ */ /*much better check for HTML than EDITPR and PLICHK */ /*recommend QCHK also be used, and finally a real HTML checker*/ Address "ISREDIT";"MACRO" /* since all are tags -- save time later using FIND NX with bounds*/ "(dataset) = dataset" "(member) = member" arg leftside rightside if "FORCE" /= translate(leftside) then do ext = right(dataset,4) if ext /= "HTML" & ext /= ".HTM" & member /= "HTMLSYN.txt" then do; "x 1 p'=' all"; "line_after 0 = noteline """||, ext "is not an HTML extension, SYNTAX CHECKING TERMINATED.""" exit 4 end end errline = "" "x 1 p'=' all";"find '<' all";"find '>' all" bound1=0; bound2=0; col1=0; row1=0; rowa=0; colb=0; row2=0; col2=0; "bounds" "(bound1,bound2) = bounds" say bound1 bound2 dataset if bound2 > 100 then do "find first p'^' 101" bound2 if rc \= 0 then "bound 1 100" end "(bound1a,bound2a) = bounds" "line_before .zfirst = noteline", """HTMLSYN" time('l') time('r') left(' ',6) "bounds" bound1 bound2, "(at start)" bound1a bound2a """" "seek all '""'" "(seekcnt) = seek_counts";seekcnt=seekcnt+0 if trunc(seekcnt/2) \= seekcnt/2 then "line_before .zfirst = msgline ""Double quotes found" seekcnt, "indicates at least one unpaired dquote (QCHK may help)""" symbols = " AElig Aacute Acirc Agrave Aring Atilde Auml", "Ccedil ETH Eacute Ecirc Egrave Euml Iacute Icirc Igrave", "Iuml Ntilde Oacute Ocirc Ograve Oslash Otilde Ouml THORN", "Uacute Ucirc Ugrave Uuml Yacute aacute acirc aelig", "agrave aring atilde auml ccedil eacute ecirc egrave eth", "euml iacute icirc igrave iuml ntilde oacute ocirc ograve", "oslash otilde ouml szlig thorn uacute ucirc ugrave uuml", "yacute yuml" /* see symbols in a table at ...*/ /* http://www.geocities.com/davemcritchie/htm/symbols.htm */ do ttt = 1 to 1; "find first ''" if rc \= 0 then leave ttt "(row1,col1) = cursor" "(line) = line" row1 "find first ''" if rc \= 0 then leave ttt "(row2,col2) = cursor" if row1 = row2 then title=substr(line,col1+7,col2-col1-7) else do title = substr(line,col1+7) do i = row1+1 to row2 if row1 <> row2 then title = title substr(line,1,col2-1) else title = title line end end l=0 + length(title) if l > 65 then "line_after .zcsr = msgline ""Standards suggest a maximum", "of 64 bytes in title --" length(title) """" end; /* ttt */ call badamp call misuse "href=http:","missing double quotes after href= when", "fixed try editpr ="" "">" call misuse "href='","single quote after href= should be double quote" call misuse "color=#","double quotes should enclose value for COLOR=" call misuse "bgcolor=#","double quotes should enclose value for BGCOLOR=" call misuse "width=0","double quotes should enclose value for WIDTH=" call misuse "width=1","double quotes should enclose value for WIDTH=" call misuse "width=2","double quotes should enclose value for WIDTH=" call misuse "width=3","double quotes should enclose value for WIDTH=" call misuse "width=4","double quotes should enclose value for WIDTH=" call misuse "width=5","double quotes should enclose value for WIDTH=" call misuse "width=6","double quotes should enclose value for WIDTH=" call misuse "width=7","double quotes should enclose value for WIDTH=" call misuse "width=8","double quotes should enclose value for WIDTH=" call misuse "width=9","double quotes should enclose value for WIDTH=" call misuse "dxxd","unchanged original skeleton code remaining" call misuse "'>","single quote before > should be double quote" call misuse "//lycos","possible misuse s/b", "like http://lycos.cs.cmu.edu or", "query6.lycos.cs.edu/lycos-form.html" /* has not really been dropped but validators are still flagging*/ /*--call f curs "&"","" -- accidentally dropped from HTML", "3.2 lang -- use "" --*/ call occurs "& ."," . -- needs semi-colon not period." call occurs "&"e",""e -- drop the e -- should be "" call occurs "&  ","  -- needs semi-colon after  " call occurs "&" ","" -- needs semi-color after "" call occurs "ÿ", "hex FF -- found in coding, probably want blanks" "seek last """"" if rc = 0 then do "seek next p'=' 1" if rc = 0 then do "line_before .zcsr = noteline ""Extra lines after """ "(row1,col1) = CURSOR" errline = errline||row1" " row1 = row1 - 1 errline = errline||row1" " end end arg leftside rightside if leftside \="" then if rightside \="" then call inner else if "Q" = translate(leftside) then signal fastQ; leftside = "<" ignore = "ZZZ" rightside = ">" call inner fastq: leftside = " (--works okay if you have Netscape or MS Explorer, but not legal--) doesn't use double quotes -- messes up and */ /* leftside = '="'; rightside = '">'; call inner */ tags = '

   <table <title>',
 '<tr  <td  <th  <em>',
 '<strong> <code> <samp> <kbd> <var> <cite> <dfn> <strike> <u>'
do n = 1 to words(tags);
   nword = word(tags,n)
   leftside = nword
   rightside = '</'||substr(nword,2)
   if '>' <> right(rightside,1) then rightside=rightside||'>'
   call inner
end

nestings = 'dir menu ol ul dl'; /* htmlol should include all*/
/* li should only exist within nestings -- use htmlol to test */

important = '<html> <head> <title> <body'
missing  = ' '
do n = 1 to words(important);
   nword = word(important,n)
   leftside = '<'nword'>'    /* will now include <> as used*/
   leftside = nword
   "find first '"leftside"'"
   if rc \= 0 then missing = missing leftside
end
if missing \= "" then
   "line_before .zf = msgline",
      """...important tags missing: " missing""""

containers = "td blockquote tr"
containers = translate(containers)
cFixes = 0
say "Now checking for empty containers:" containers
/* <td></td>  should be replaced with <td>&nbsp;</td> */
do n = 1 to words(containers)
   "find first p'='"
   if rc /= 0 then iterate
   nword = word(containers,n)
   leftside = '<'nword
   Do forever;
      "find next '"leftside"'"
      if rc /= 0 then leave
      "(row1,col1) = cursor"
      "find next '>'"
      if rc /= 0 then leave
      "find next p'^'"
      if rc /= 0 then leave
      "(row2,col2) = cursor"
      "(line) = line" row2
      uline = translate(line)
      if "</"nword = substr(uline,col2,length(nword)+2) then do
         noteline = "Empty container for" nword "between row" row1 col1 "and row" row2 col2
         "line_after" row1 "= noteline (noteline)"
         errline = errline||row1" "
         if row1 /= row2 then errline = errline||row2" "
         if nword = "TD" then do
            /* fix then <td></td> errors */
            cFixes = cFixes + 1
            "line_before" row2 "= msgline ""*** inserted &&nbsp; ***"""
            "line_before" row2 "= noteline (line)"
            line = substr(line,1,col2-1)"&nbsp;"substr(line,col2)
            "line" row2 "= (line)"
         end
      end
      "cursor =" row1 col1
   end
end
if cFixes /= 0 then do
   text = cFixes " -- TD container errors were corrected"
   "line_before .zfirst = msgline (text)"
   say text
end

"line_before .zfirst = noteline",
  """HTMLSYN" time('l') "finished in" time('r') "seconds"""
"loc 0";"locate .zf .zl special";"up 3"

/* thought of something else for one-time test*/
arg leftside rightside
if leftside \="" then if rightside \="" then call inner

"bounds" bound1 bound2
"x 1 p'=' all"   /* exclude all non Special lines */
say "a-done"
do i = 1 to words(errline)
   "xstatus" word(errline,i) "= NX"
end
"loc 0";"locate .zf .zl special";"up 3"
return

/************************************************************/
inner:
ignorex = ignore; ignore="ZZZ"
say 'Now checking -- ' leftside ' ... ' rightside ' -- ' ignorex
row2 = 1; col2 = 0
n1 = 0
"cursor =" row2 col2
DO I = 1 TO 8000
   if i > 7995 then say 'approaching loop prevention limit of 8000'
   "cursor =" row2 col2
   if n1 = 0 then do
     "find next nx '"leftside"'"
     if rc \= 0 then do forever
           /* --- show all after last leftside found */
           "find next nx '"rightside"'"
           if rc \= 0 then leave i
           "(row2,col2) = cursor"
           call note2n /* eof condition*/
     end
     "(row1,col1) = cursor"
     if ignorex <> "ZZZ" then do
        "(line) = line" row1
        line=translate(line)
        if ignorex = substr(line,col1,length(ignorex)) then do
           say row1 '---ccc ignore' row1 '--' ignorex
           row2 = row1; col2 = col1 + 1; iterate i
        end
     end
   end
   else do
     row1 = nr1; col1 = nc1; n1=0;
     if ignorex <> "ZZZ" then do
        "(line) = line" row1
        line=translate(line)
     end
   end;
   "cursor =" row2 col2
   "find next nx  '"rightside"'"
   if rc \= 0 then do
      "(row2,col2) = cursor"
      notex = leftside "at" row1 col1 "not terminated with/include",
           rightside "before end of file"
      "line_after" row1 "= msgline (notex)"
      errline = errline||row1" "
      row2 = row1; col2=col1;
      iterate i
   end
   "(row2,col2) = cursor"
   c1 = 10000 * row1 + col1 + 100000
   c2 = 10000 * row2 + col2 + 100000
   if c2 < c1 then do; call note2; end;
   if c1 < c2 then do;
      /* good but make sure no intervening leftside in-between*/
      "cursor =" row1 col1
      n1 = 0
      "find next nx  '"leftside"'"
      if rc \= 0 then iterate i  /* still good if no more are found*/
      "(nr1,nc1) = cursor"
      n1 = 10000 * nr1 + nc1 + 100000
      if n1 < c2 then do
         call note1N  /* invalid attempt to nest leftside(s) */
         row2 = row1; col2 = col1;
      end;
    end;
end
return

badamp:
row2 = 1; col2 = 0
amperr=0
"cursor =" row2 col2
/* & && &&& &LT &amp; &garb; */
/* & && &&& &RT &amp; &garb; */
/* &amp;  &lt; &gt; &quot; */
/* &AMP;  &LT; &GT; &QUOT; */
/* &amp;  &lt; &gt; &quot; &quote; */
/* & */
/* &&&& */
"(zlast) = linenum .zlast"
zlast=zlast+0
do i = 1 to 2000;   /* looking for ampersands (& = x'26') */
  "find next  x'26'"
  rcx = rc
  if rcx \= 0 then leave i;
  "(row2,col2) = cursor"
  "(orig) = line" row2
  line = substr(orig,col2);
  "cursor =" row2 col2
  si = pos('cgi',orig)
  if si \= 0 then iterate i
  si = pos(';',line)
  if si = 0 then do;
     if amperr = 0 then call showvalid
     notex = "Check use of ampersand(s)|symbol at/after" row2 col2,
          "perhaps s/b &amp;"
     "line_after" row2  "= noteline (notex)"
      errline = errline||row2" "
     if row2 = zlast then leave i
     row2 = row2 + 1; col2 = 0
     "cursor =" row2 col2
     amperr=amperr+1
     iterate i
  end;
  line = substr(line,2,si-2)
  /* symbols are exact capital or lowercase */
  j = wordpos(line,symbols)
  if j \= 0 then iterate i
  /* ... Non-breaking space, Soft-hyphen, Registered, Copyright*/
  j = wordpos(translate(line),,
      'AMP CCEDIL LT GT NBSP QUOT SHY REG COPY')
  if j \= 0 then iterate i
  x2=substr(line,1,1)||substr(line,4,1)
  if x2 = "# " then iterate i
  x2=substr(line,1,1)||substr(line,5,1)
  if x2 = "# " then iterate i
  if amperr = 0 then call showvalid
  notex = "Check use of ampersand(s)|symbol at/after" row2 col2,
        """&"line";"""
  "line_after" row2 "= noteline (notex)"
  errline = errline||row2" "
  amperr=amperr+1
  if row2 = zlast then leave i
  row2 = row2 + 1; col2 = 0
  "cursor =" row2 col2
end
return

misuse:
 row2 = 0
 parse arg parm1,parm2
 "find first """parm1""""
 if rc \= 0 then return
 "(row2,col2) = cursor"
 notex = parm2
 do i = 1 to 950
     "line_after .zcsr = msgline (notex)"
     "cursor =" row2 col2      /* fixup for spf/pc */
     errline = errline||row2" "
     "find next """parm1""""
     if rc \= 0 then return
     "(row2,col2) = cursor"
  end;
return

occurs:
 row2 = 0
 parse arg parm1,parm2
 "seek all   """parm1""""
 if rc \= 0 then return
 "(SCNT,SLNS) = SEEK_COUNTS"
 "(row2,col2) = cursor"
 notex = parm2||", x("||scnt||")"
 "line_after .zcsr = msgline (notex)"
 errline = errline||row2" "
return

note1:
   notex = leftside "at" row1 col1 " not properly terminated"
    "(line) = line" row1
    line = translate(line)
    if ignorex <> "ZZZ" then do
       "(line) = line" row1
       line=translate(line)
       if ignorex = substr(line,col1,length(ignorex)) then do
          notex = "OK --" ignorex "at" row1 col1 "stands by itself"
          row2 = row1; col2=col1;
          "line_after" row1 "= noteline (notex)"
          return
       end
       row2 = row1; col2=col1;
    end
   "line_after" row1 "= msgline (notex)"
   errline = errline||row1" "
   return;
note1n:
   notex = leftside "at" row1 col1 " Not terminated",
      "before neXt" leftside
   if ignorex <> "ZZZ" then do
      "(line) = line" row1
      line=translate(line)
      if ignorex = substr(line,col1,length(ignorex)) then do
         notex = "OK --" ignorex "at" row1 col1 "stands by itself"
         "line_after" row1 "= noteline (notex)"
         errline = errline||row1" "
         return;
      end
   end
   "line_after" row1 "= msgline (notex)"
   errline = errline||row1" "
   return;
note2:
   notex = "extraneous termination" rightside "at" row2 col2
   "line_after" row2 "= msgline (notex)"
   errline = errline||row2" "
   return;
note2n:
   notex = "extraneous termination" rightside "at" row2 col2,
      "no more" leftside "thru EOF"
   "line_after" row2 "= msgline (notex)"
   errline = errline||row2" "
   return;
showvalid:
   notex="1...5...10...15...20...25...30...35...40...45...50...55",
        ||"...60...65...70...75...80...85...90..."
   "line_before" row2  "= noteline (notex)"
   if amperr \= 0 then return
   notex="            &Aacute; ... &yuml; &#09; ... &#255;"
   errline = errline||row2" "
   "line_after" row2  "= noteline (notex)"
   notex="Valid symbols look like-- &amp; &gt; &lt; &quot;"
   "line_after" row2  "= noteline (notex)"
return;
   /* &&  &#147;quoted material &&#148; */