Article delegate-en/381 of [1-5169] on the server localhost:119
  upper oldest olders older1 this newer1 newers latest
search
[Top/Up] [oldest] - [Older+chunk] - [Newer+chunk] - [newest + Check]
[Reference:<_A380@delegate-en.ML_>]
Newsgroups: mail-lists.delegate-en

[DeleGate-En] Re: Improperly rewritten URLs
29 Mar 1999 09:58:18 GMT ysato@etl.go.jp (Yutaka Sato)


On 03/29/99(01:05) you pjuaqbdyi-t7hpjipeobxr.ml@ml.delegate.org wrote
in <_A380@delegate-en.ML_>
 |I've been using DeleGate as a delegate-type proxy
 |(ie https://delegate.machine.edu/-_-http://.....)
 |and I think that I've found some URLs that aren't being
 |rewritten properly.
 |
 |At the end of this email I've attached some html that
 |went through delegate.  Note that urls are being
 |rewritten properly except for  "http://1-800.nytimes.com..",
 |which gets through unaltered.  
 |
 |Maybe a machine name like 1-800 is technically illegal; I'm really not
 |sure.

Such hostname is not so preferable because it makes distinguishing
hostname from dotted IP-address notation be more complex.
But in fact such hostname is legal, so I tried to support it with
an enclosed patch.
# I introduced "SLL_OTHERWISE" pseudo symbol for "ALTernative"
# rule of the simple LL parser to detect invalid domain-labels
# which are ending with "-" like "xxx-.yyy.zzz".  This makes the
# patch slightly large.

RFC1738(Uniform Resource Locators (URL)):
> ; URL schemeparts for ip based protocols:
>
> ip-schemepart  = "//" login [ "/" urlpath ]
>
> login          = [ user [ ":" password ] "@" ] hostport
> hostport       = host [ ":" port ]
> host           = hostname | hostnumber
> hostname       = *[ domainlabel "." ] toplabel
> domainlabel    = alphadigit | alphadigit *[ alphadigit | "-" ] alphadigit
> toplabel       = alpha | alpha *[ alphadigit | "-" ] alphadigit
> alphadigit     = alpha | digit
> hostnumber     = digits "." digits "." digits "." digits

(This RFC says non top-level domain can begin with non-alphabetic character)

RFC1034(Domain Concepts and Facilities):
> 3.5. Preferred name syntax
>  The DNS specifications attempt to be as general as possible in the rules
>  for constructing domain names.  The idea is that the name of any
> existing object can be expressed as a domain name with minimal changes.
> However, when assigning a domain name for an object, the prudent user
> will select a name which satisfies both the rules of the domain system
> and any existing rules for the object, whether these rules are published
> or implied by existing programs.
> For example, when naming a mail domain, the user should satisfy both the
> rules of this memo and those in RFC-822.  When creating a new host name,
> the old rules for HOSTS.TXT should be followed.  This avoids problems
> when old software is converted to use domain names.
> The following syntax will result in fewer problems with many
> applications that use domain names (e.g., mail, TELNET).
>
> <domain> ::= <subdomain> | " "
> <subdomain> ::= <label> | <subdomain> "." <label>
> <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
> <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
> <let-dig-hyp> ::= <let-dig> | "-"
> <let-dig> ::= <letter> | <digit>
> <letter> ::= any one of the 52 alphabetic characters A through Z in
>              upper case and a through z in lower case
> <digit> ::= any one of the ten digits 0 through 9

(This RFC says any componet of hostname is prefered to begin with alphabet)

Cheers,
Yutaka
--
Yutaka Sato <ysato@etl.go.jp> http://www.etl.go.jp/~ysato/   @ @ 
Computer Science Division, Electrotechnical Laboratory      ( - )
1-1-4 Umezono, Tsukuba, Ibaraki, 305-8568 Japan            _<   >_


diff -c -r ../../delegate5.9.1/src/url.c ./src/url.c
*** ../../delegate5.9.1/src/url.c	Thu Mar 11 16:00:06 1999
--- ./src/url.c	Mon Mar 29 18:37:42 1999
***************
*** 1406,1411 ****
--- 1406,1419 ----
  ABCDEFGHIJKLMNOPQRSTUVWXYZ\
  ";
  
+ static char ALPHADIGIT[] = "\
+ abcdefghijklmnopqrstuvwxyz\
+ ABCDEFGHIJKLMNOPQRSTUVWXYZ\
+ 000000000f\
+ ";
+ extern char SLL_OTHERWISE[];
+ #define OTHERWISE SLL_OTHERWISE
+ 
  /*
  static char NALPHA[] = "\
  abcdefghijklmnopqrstuvwxyz\
***************
*** 1489,1497 ****
  ISRULE( NALPHAS);
  ISRULE( XALPHAS);
  ISRULE( YALPHAS);
  
  SEQ(HOSTNAME)
! 	{ "name",	IMM,		IALPHA		},
  	{ "name",	".",		HOSTNAME,	OPTIONAL},
  END
  SEQ(HOSTNUMBER)
--- 1497,1506 ----
  ISRULE( NALPHAS);
  ISRULE( XALPHAS);
  ISRULE( YALPHAS);
+ ISRULE( DOMLABEL);
  
  SEQ(HOSTNAME)
! 	{ "name",	IMM,		DOMLABEL	},
  	{ "name",	".",		HOSTNAME,	OPTIONAL},
  END
  SEQ(HOSTNUMBER)
***************
*** 1501,1508 ****
  	{ "num4",	".",		DIGITS		},
  END
  ALT(HOST)
- 	{ "name",	IMM,		HOSTNAME	},
  	{ "number",	IMM,		HOSTNUMBER	},
  END
  SEQ(PORT)
  	{ "number",	IMM,		DIGITS		},
--- 1510,1517 ----
  	{ "num4",	".",		DIGITS		},
  END
  ALT(HOST)
  	{ "number",	IMM,		HOSTNUMBER	},
+ 	{ "name",	IMM,		HOSTNAME	},
  END
  SEQ(PORT)
  	{ "number",	IMM,		DIGITS		},
***************
*** 1510,1515 ****
--- 1519,1533 ----
  SEQ(HOSTPORT)
  	{ "host",	IMM,		HOST,		PUTVAL},
  	{ "port",	":",		PORT,		OPTIONAL|PUTVAL},
+ END
+ 
+ ALT(DOMLABEL2)
+ 	{ "alphadigit",	ALPHADIGIT,	DOMLABEL2,	CHARSET},
+ 	{ "hyphen",	"-",		DOMLABEL	},
+ 	{ "terminate",	OTHERWISE,	SUCCESS		},
+ END
+ SEQ(DOMLABEL)
+ 	{ "alphadigit",	ALPHADIGIT,	DOMLABEL2,	CHARSET},
  END
  
  SEQ(IALPHA)
diff -c -r ../../delegate5.9.1/rary/SLL.c ./rary/SLL.c
*** ../../delegate5.9.1/rary/SLL.c	Wed May 27 21:43:01 1998
--- ./rary/SLL.c	Mon Mar 29 18:37:54 1999
***************
*** 22,27 ****
--- 22,29 ----
  #include <string.h>
  #include "SLL.h"
  #define DEBUG 0
+ char SLL_OTHERWISE[] = {0};
+ #define OTHERWISE SLL_OTHERWISE
  
  int SLLparse(lev, prp, srca, nsrcp, putv, vala, size, nvalp)
  	SLLRule *prp;
***************
*** 44,49 ****
--- 46,52 ----
  	char *val;
  	int error;
  	int slen,rsize;
+ 	int nmatch;
  
  	src = srca;
  	val = vala;
***************
*** 55,60 ****
--- 58,64 ----
  if( DEBUG )
  printf("%2d [%s] %-10s: %s\n", lev, type, name, src);
  
+ 	nmatch = 0;
  	for( si = 1; prp[si].r_name; si++ ){
  		crp = &prp[si];
  
***************
*** 69,78 ****
--- 73,89 ----
  		}else{
  			gate = crp->r_gate;
  			glen = strlen(gate);
+ 			if( gate == OTHERWISE ){
+ 				if( (flag & ISALT) && 0 < nmatch )
+ 					match = 0;
+ 				else	match = 1;
+ 			}else
  			if( crp->r_flag & IGNCASE )
  				match = strncasecmp(src,gate,glen) == 0;
  			else	match = strncmp(src,gate,glen) == 0;
  		}
+ 		if( 0 < glen && match )
+ 			nmatch++;
  
  		if( match ){
  			if( putv && (crp->r_flag & PUTGATE) ){
***************
*** 113,118 ****
--- 124,130 ----
  					error = 1;
  					goto failure;
  				}
+ 				src -= glen;
  			}
  		}else{
  			if( flag & ISSEQ )

  admin search upper oldest olders older1 this newer1 newers latest
[Top/Up] [oldest] - [Older+chunk] - [Newer+chunk] - [newest + Check]
@_@V