Published in: Regular Expression
(Supported by JavaScript, maybe other languages)
//replace() can be used to parse the URI. For example, to get the path: // path = uri.replace(regexUri, "$5$6"); //****************************************************// //***************** Validate a URI *******************// //****************************************************// //- The different parts are kept in their own groups and can be recombined // depending on the scheme: // - http as $1://$3:$4$5?$7#$8 // - ftp as $1://$2@$3:$4$5 // - mailto as $1:$6?$7 //- groups are as follows: // 1 == scheme // 2 == userinfo // 3 == host // 4 == port // 5,6 == path (5 if it has an authority, 6 if it doesn't) // 7 == query // 8 == fragment var regexUri = /^([a-z0-9+.-]+):(?://(?:((?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9A-F]{2})*)@)?((?:[a-z0-9-._~!$&'()*+,;=]|%[0-9A-F]{2})*)(?::(\d*))?(/(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)?|(/?(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)?)(?:\?((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*))?(?:#((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*))?$/i; /*composed as follows: ^ ([a-z0-9+.-]+): #scheme (?: // #it has an authority: (?:((?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9A-F]{2})*)@)? #userinfo ((?:[a-z0-9-._~!$&'()*+,;=]|%[0-9A-F]{2})*) #host (?::(\d*))? #port (/(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)? #path | #it doesn't have an authority: (/?(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)? #path ) (?: \?((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*) #query string )? (?: #((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*) #fragment )? $ */ //****************************************************// //** Validate a URI (includes delimiters in groups) **// //****************************************************// //- The different parts--along with their delimiters--are kept in their own // groups and can be recombined as $1$6$2$3$4$5$7$8$9 //- groups are as follows: // 1,6 == scheme:// or scheme: // 2 == userinfo@ // 3 == host // 4 == :port // 5,7 == path (5 if it has an authority, 7 if it doesn't) // 8 == ?query // 9 == #fragment var regexUriDelim = /^(?:([a-z0-9+.-]+:\/\/)((?:(?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9A-F]{2})*)@)?((?:[a-z0-9-._~!$&'()*+,;=]|%[0-9A-F]{2})*)(:(?:\d*))?(\/(?:[a-z0-9-._~!$&'()*+,;=:@\/]|%[0-9A-F]{2})*)?|([a-z0-9+.-]+:)(\/?(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+(?:[a-z0-9-._~!$&'()*+,;=:@\/]|%[0-9A-F]{2})*)?)(\?(?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*)?(#(?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*)?$/i; //****************************************************// //***************** Validate a URL *******************// //****************************************************// //Validates a URI with an http or https scheme. //- The different parts are kept in their own groups and can be recombined as // $1://$2:$3$4?$5#$6 //- Does not validate the host portion (domain); just makes sure the string // consists of valid characters (does not include IPv6 nor IPvFuture // addresses as valid). var regexUrl = /^(https?):\/\/((?:[a-z0-9.-]|%[0-9A-F]{2}){3,})(?::(\d+))?((?:\/(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})*)*)(?:\?((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*))?(?:#((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*))?$/i; //****************************************************// //**************** Validate a Mailto *****************// //****************************************************// //Validates a URI with a mailto scheme. //- The different parts are kept in their own groups and can be recombined as // $1:$2?$3 //- Does not validate the email addresses themselves. var regexMailto = /^(mailto):((?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+)?(?:\?((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*))?$/i;
You need to login to post a comment.
