Article delegate-en/2301 of [1-5169] on the server localhost:119
  upper oldest olders older1 this newer1 newers latest
search
[Top/Up] [oldest] - [Older+chunk] - [Newer+chunk] - [newest + Check]
[Reference:<_A2295@delegate-en.ML_>]
Newsgroups: mail-lists.delegate-en

[DeleGate-En] Re: HTTP mediation
21 Jun 2003 14:38:02 GMT feedback@delegate.org (Yutaka Sato)


On 06/19/03(21:42) you "Ng S. T. Chong" <p2ieabdyi-5ilfsunnr5fr.ml@ml.delegate.org> wrote
in <_A2295@delegate-en.ML_>
 |Thanks Sato-san.  The fix supports a larger number of sites.
 |
 |As you pointed out dynamically generated URLs are at the heart of the
 |problem.   Unfortunately, increasingly many sites are moving towards this
 |direction. Take for instance the popular Google search engine.  

I tried to relay "http://www.google.com/" as "http://DeleGate/www.google.com/"
with MOUNT="/* http://*".  Problems found are as follows.

1) In the top page it calculates URL by JavaScript like
  "function c(p,l,e){ ... f.action = 'http://'+p; ... }".
Although such incomplete url does not make sense in usual context like
<A HREF=http://>, but MOUNT="/* http://*" should make the rewriting. so
I made modification to enable it.

2) The top page contains reference by relative path.
To cope with such case, "http://DeleGate/server" should be redirected to
"http://DeleGate/server/" automatically, but the redirection supported only
in default MOUNT with "-_-" notation like "http://DeleGate/-_-http://server".
 Since it should be supported in general MOUNT, I made it so.

3) The site uses HTML tags which contain multiple URLs in a single tag.
Since it should be handled, I did it.

4) The top page contains URL fragment identifier like "url(#xxx)" which
causes trouble when it is prefixed with base url, so I made DeleGate not
try to rewrite fragment-only URL.

5) The top page contains JavaScrit in which quote character is escaped like
'document.write("... <a href=\"...\" ...")'.  Since such escaping seems usual,
I coped with it.

The enclosed patch includes above modifications.

6) The top page contains URL which must be kept in absolute form like
"if (!hp.isHomePage('http://www.google.co.jp/')) { ..."
DeleGate rewrites it to 
"if (!hp.isHomePage('/www.google.co.jp/')) { ..."
to break the functionality.  It should be like
"if (!hp.isHomePage('http://DeleGate/www.google.co.jp/')) { ..."
It is possible by disabling partializing URL with URICONV="mount:+" parameter.

Cheers,
Yutaka
--
  @ @ Yutaka Sato <y.sato@delegate.org> http://www.delegate.org/y.sato/
 ( - ) National Institute of Advanced Industrial Science and Technology (AIST)
_<   >_ 1-1-4 Umezono, Tsukuba, Ibaraki, 305-8568 Japan
Do the more with the less -- B. Fuller

diff -cr ../dist/delegate8.6.0-snap03061804/src/delegate.h ./delegate.h
*** ../dist/delegate8.6.0-snap03061804/src/delegate.h	Sun Jun 15 04:59:26 2003
--- ./delegate.h	Sat Jun 21 15:01:25 2003
***************
*** 140,145 ****
--- 140,146 ----
  	Urlx	r_my;	/* real URL of me (DeleGate) */
  	Urlx	r_vb;	/* virtual base URL of me for client */
  	char   *r_ctype; /* type of base data {html, css, header} */
+ 	char    r_curtag[32]; /* in the tag now */
  	int	r_inScript; /* now scanning <SCRIPT> */
  	int	r_inStyle; /* now in <STILE> */
  	char    r_quote; /* the quote char. of the current quote string */
diff -cr ../dist/delegate8.6.0-snap03061804/src/http.c ./http.c
*** ../dist/delegate8.6.0-snap03061804/src/http.c	Wed Jun 18 04:15:56 2003
--- ./http.c	Sat Jun 21 15:04:40 2003
***************
*** 465,470 ****
--- 465,471 ----
  	referer->r_ctype = 0;
  	referer->r_inScript = 0;
  	referer->r_inStyle = 0;
+ 	referer->r_curtag[0] = 0;
  
  	rp = refbuf;
  
diff -cr ../dist/delegate8.6.0-snap03061804/src/mount.c ./mount.c
*** ../dist/delegate8.6.0-snap03061804/src/mount.c	Mon Jun 16 00:35:00 2003
--- ./mount.c	Sat Jun 21 21:31:38 2003
***************
*** 947,952 ****
--- 947,953 ----
  		if( mt->Dst.u_remain && strtailchr(mt->Dst.u_src)=='/' )
  		{
  			mt->u_dirmatch = 1;
+ 			mt->u_moved = 302;
  		}
  
  		if( login[0] )
***************
*** 1441,1446 ****
--- 1442,1458 ----
  		 * input URL matched with the URL pattern of this MOUNT point
  		 */
  
+ 		if( qtype & U_MOVED_TO ){
+ 			if( (match & DIRMATCH)
+ 			 || mt->Dst.u_path2site && strchr(iurl+len,'/')==0
+ 			){
+ 				sprintf(url,"%s://%s%s/",CTX_clif_proto(ctx),
+ 					myhostport,iurl);
+ 				sv1log("MOUNT DIRMATCH gen MovedTo: %s\n",url);
+ 				goto EXIT;
+ 			}
+ 		}
+ 
  		if( (qtype & U_MOVED_TO ) && (mt->u_flags & U_MOVED_TO ) == 0
  		 || (qtype & U_USE_PROXY) && (mt->u_flags & U_USE_PROXY) == 0
  		){
***************
*** 1597,1602 ****
--- 1609,1615 ----
  		if( mt->u_flags & (U_MOVED_TO|U_USE_PROXY) )
  			return 0;
  	}
+ EXIT:
  	last_forw = ai;
  	if( rmt ) *rmt = mt;
  	return mt->u_opts;
***************
*** 1802,1809 ****
--- 1815,1824 ----
  printf("#### MOUNT path2site: [%s] DG[%s]\n",hostport,delegate);
  */
  }else{
+ 			if( hostport[0] ){
  			sprintf(up,"%s/",hostport);
  			up += strlen(up);
+ 			}
  }
  		}
  
diff -cr ../dist/delegate8.6.0-snap03061804/src/url.c ./url.c
*** ../dist/delegate8.6.0-snap03061804/src/url.c	Mon Jun 16 00:32:52 2003
--- ./url.c	Sat Jun 21 22:30:20 2003
***************
*** 252,257 ****
--- 252,260 ----
  			ctype = "";
  	}
  	if( Base ){
+ 		if( Base->r_curtag[0] ){
+ 			tag = Base->r_curtag;
+ 		}
  		Base->r_quote = 0;
  		inScript = Base->r_inScript;
  		inStyle = Base->r_inStyle;
***************
*** 403,408 ****
--- 406,414 ----
  					}
  					tag = NULL;
  					isendtag = 0;
+ 					if( Base ){
+ 						Base->r_curtag[0] = 0;
+ 					}
  				}
  
  if( qconvmask & (URICONV_ANY|TAGCONV_META) )
***************
*** 581,590 ****
--- 587,598 ----
  			up = p+4;
  			if( *up == '\'' || *up == '"' )
  				up++;
+ 			if( *up != '#' ){
  			top = up;
  			ref = up;
  			attr = up;
  			goto exit;
+ 			}
  		}
  	}
  }
***************
*** 630,635 ****
--- 638,646 ----
  		while( isspace(*p) )
  			p++;
  
+ 		if( inScript && *p == '\\' && (p[1]=='\'' || p[1]=='"') )
+ 			p++; /* escaped quote in script */
+ 
  		if( *p == '"' || *p == '\'' )
  		{
  			quotech = *p;
***************
*** 674,679 ****
--- 685,693 ----
  	if( attrp != NULL )
  		*attrp = attr;
  	if( Base != NULL ){
+ 		if( tag ){
+ 			wordScan(tag,Base->r_curtag);
+ 		}
  		Base->r_inScript = inScript;
  		Base->r_inStyle = inStyle;
  		Base->r_quote = quotech;
***************
*** 2151,2156 ****
--- 2165,2173 ----
  		return 0;
  	hostport = getv(av,"hostport");
  	if( hostport == NULL ) hostport = getv(av,"host");
+ 	if( hostport == NULL /* && inScript */ ){
+ 		hostport = "";
+ 	}
  	path = getv(av,"path");
  	search = getv(av,"search");
  
***************
*** 2589,2595 ****
--- 2606,2615 ----
  
  SEQ(HTTP)
  	{ "://",	"://",		NEXT		},
+ /*
  	{ "hostport",	IMM,		HOSTPORT,	PUTVAL},
+ */
+ 	{ "hostport",	IMM,		HOSTPORT,	OPTIONAL|PUTVAL},
  	{ "path",	"/",		PATH,		OPTIONAL|PUTVAL},
  	{ "search",	"?",		SEARCH,		OPTIONAL|PUTVAL},
  END

  admin search upper oldest olders older1 this newer1 newers latest
[Top/Up] [oldest] - [Older+chunk] - [Newer+chunk] - [newest + Check]
@_@V