网络时代URL无处不在,
太常见了以至于这东西的好多细节都似是而非,
虽然大多数时间内都不会有问题,
但是不注意还是会造成一些谬误,
在写程序的时候往往造成一些不必要的错误,
在此我将这规范的语法(BNF)放在这里,以便于正确处理它。
引用自
http://man.chinaunix.net/develop/rfc/RFC1738.txt 的第五部分。
;URL的一般形式如下:
genericurl = scheme ":" schemepart
;特定的预定义方案在这里进行定义;新方案可以在IANA那儿注册
url = httpurl | ftpurl | newsurl |
nntpurl | telneturl | gopherurl |
waisurl | mailtourl | fileurl |
prosperourl | otherurl
;新方案遵从一般语法
otherurl = genericurl
;方案都是小写的;解释程序应该忽略大小写
scheme = 1*[ lowalpha | digit | "+" | "-" | "." ]
schemepart = *xchar | ip-schemepart
;基于协议的ip的URL方案部分:
ip-schemepart = "//" login [ "/" urlpath ]
login = [ user [ ":" password ] "@" ] hostport
hostport = host [ ":" port ]
host = hostname | hostnumber
hostname = *[ domainlabel "." ] toplabel
domainlabel = alphadigit | alphadigit *[ alphadigit | "-" ] alphadigit
toplabel = alpha | alpha *[ alphadigit | "-" ] alphadigit
alphadigit = alpha | digit
hostnumber = digits "." digits "." digits "." digits
port = digits
user = *[ uchar | ";" | "?" | "&" | "=" ]
password = *[ uchar | ";" | "?" | "&" | "=" ]
urlpath = *xchar ;建立在3.1节的协议基础上
;预定义方案:
;FTP(文件传输协议,请参考RFC959)
ftpurl = "ftp://" login [ "/" fpath [ ";type=" ftptype ]]
fpath = fsegment *[ "/" fsegment ]
fsegment = *[ uchar | "?" | ":" | "@" | "&" | "=" ]
ftptype = "A" | "I" | "D" | "a" | "i" | "d"
;FILE(文件)
fileurl = "file://" [ host | "localhost" ] "/" fpath
;HTTP(超文本传输协议)
httpurl = "http://" hostport [ "/" hpath [ "?" search ]]
hpath = hsegment *[ "/" hsegment ]
hsegment = *[ uchar | ";" | ":" | "@" | "&" | "=" ]
search = *[ uchar | ";" | ":" | "@" | "&" | "=" ]
;GOPHER(请参考RFC1436)
gopherurl = "gopher://" hostport [ / [ gtype [ selector
[ "%09" search [ "%09" gopher+_string ] ] ] ] ]
gtype = xchar
selector = *xchar
gopher+_string = *xchar
;MAILTO(请参考RFC822)
mailtourl = "mailto:" encoded822addr
encoded822addr = 1*xchar ;在RFC822中进一步定义了
;NEWS(新闻,请参考RFC1036)
newsurl = "news:" grouppart
grouppart = "*" | group | article
group = alpha *[ alpha | digit | "-" | "." | "+" | "_" ]
article = 1*[ uchar | ";" | "/" | "?" | ":" | "&" | "=" ] "@" host
;NNTP(网络新闻传输协议,请参考RFC977)
nntpurl = "nntp://" hostport "/" group [ "/" digits ]
;TELNET(远程登录协议)
telneturl = "telnet://" login [ "/" ]
;WAIS(广域信息服务系统,请参考RFC1625)
waisurl = waisdatabase | waisindex | waisdoc
waisdatabase = "wais://" hostport "/" database
waisindex = "wais://" hostport "/" database "?" search
waisdoc = "wais://" hostport "/" database "/" wtype "/" wpath
database = *uchar
wtype = *uchar
wpath = *uchar
;PROSPERO
prosperourl = "prospero://" hostport "/" ppath *[ fieldspec ]
ppath = psegment *[ "/" psegment ]
psegment = *[ uchar | "?" | ":" | "@" | "&" | "=" ]
fieldspec = ";" fieldname "=" fieldvalue
fieldname = *[ uchar | "?" | ":" | "@" | "&" ]
fieldvalue = *[ uchar | "?" | ":" | "@" | "&" ]
;杂七杂八的定义
lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" |
"i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" |
"q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" |
"y" | "z"
hialpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
"J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
"S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
alpha = lowalpha | hialpha
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
"8" | "9"
safe = "$" | "-" | "_" | "." | "+"
extra = "!" | "*" | "'" | "(" | ")" | ","
national = "{" | "}" | "|" | "\" | "^" | "~" | "[" | "]" | "`"
punctuation = "<" | ">" | "#" | "%" | <">
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "="
hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
"a" | "b" | "c" | "d" | "e" | "f"
escape = "%" hex hex
unreserved = alpha | digit | safe | extra
uchar = unreserved | escape
xchar = unreserved | reserved | escape
digits = 1*digit