url
Linux C语言实现urlencode和urldecode
urlencode编码的基本规则
- 字符”a”-“z”,”A”-“Z”,”0”-“9”,”.”,”-“,”*”,和”_” 都不被编码,维持原值;
- 空格” “被转换为加号”+”。
- 其他每个字节都被表示成”%XY”格式的由3个字符组成的字符串,编码为UTF-8(特别需要注意: 这里是大写形式的hexchar)。
urlencode编码
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
static unsigned char hexchars[] = "0123456789ABCDEF";
/**
* @brief URLEncode : encode the base64 string "str"
*
* @param str: the base64 encoded string
* @param strsz: the str length (exclude the last \0)
* @param result: the result buffer
* @param resultsz: the result buffer size(exclude the last \0)
*
* @return: >=0 represent the encoded result length
* <0 encode failure
*
* Note:
* 1) to ensure the result buffer has enough space to contain the encoded string, we'd better
* to set resultsz to 3*strsz
*
* 2) we don't check whether str has really been base64 encoded
*/
int URLEncode(const char *str, const int strsz, char *result, const int resultsz)
{
int i, j;
char ch;
if(strsz < 0 || resultsz < 0)
return -1;
for(i = 0, j = 0; i < strsz && j < resultsz; i++)
{
ch = *(str + i);
if((ch >= 'A' && ch <= 'Z') ||
(ch >= 'a' && ch <= 'z') ||
(ch >= '0' && ch <= '9') ||
ch == '.' || ch == '-' || ch == '*' || ch == '_')
result[j++] = ch;
else if(ch == ' ')
result[j++] = '+';
else
{
if(j + 3 <= resultsz)
{
result[j++] = '%';
result[j++] = hexchars[(unsigned char)ch >> 4];
result[j++] = hexchars[(unsigned char)ch & 0xF];
}
else
{
return -2;
}
}
}
if(i == 0)
return 0;
else if(i == strsz)
return j;
return -2;
}
// return < 0: represent failure
int main(int argc, char *argv[])
{
int fd = -1;
char buf[1024], result[1024 * 3];
int ret;
int i = 0;
if(argc != 2)
{
printf("please input the encoding filename\n");
return -1;
}
if((fd = open(argv[1], O_RDONLY)) == -1)
{
printf("open file %s failure\n", argv[1]);
return -2;
}
while((ret = read(fd, buf, 1024)) >= 0)
{
if(ret == 0)
break;
ret = URLEncode(buf, ret, result, 1024 * 3);
if(ret < 0)
break;
for(i = 0; i < ret; i++)
printf("%c", result[i]);
}
if(ret < 0)
{
printf("encode data failure\n");
}
close(fd);
return ret;
}
urldecode解码
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
static unsigned char hexchars[] = "0123456789ABCDEF";
/**
* @brief URLDecode : decode the urlencoded str to base64 encoded string
*
* @param str: the urlencoded string
* @param strsz: the str length (exclude the last \0)
* @param result: the result buffer
* @param resultsz: the result buffer size(exclude the last \0)
*
* @return: >=0 represent the decoded result length
* <0 encode failure
*
* Note:
* 1) to ensure the result buffer has enough space to contain the decoded string, we'd better
* to set resultsz to strsz
*
*/
int URLDecode(const char *str, const int strsz, char *result, const int resultsz, const char **last_pos)
{
int i, j;
char ch;
char a;
*last_pos = str;
if(strsz < 0 || resultsz < 0)
return -1;
for(i = 0, j = 0; i < strsz && j < resultsz; j++)
{
ch = *(str + i);
if(ch == '+')
{
result[j] = ' ';
i += 1;
}
else if(ch == '%')
{
if(i + 3 <= strsz)
{
ch = *(str + i + 1);
if(ch >= 'A' && ch <= 'F')
{
a = (ch - 'A') + 10;
}
else if(ch >= '0' && ch <= '9')
{
a = ch - '0';
}
else if(ch >= 'a' && ch <= 'f')
{
a = (ch - 'a') + 10;
}
else
{
return -2;
}
a <<= 4;
ch = *(str + i + 2);
if(ch >= 'A' && ch <= 'F')
{
a |= (ch - 'A') + 10;
}
else if(ch >= '0' && ch <= '9')
{
a |= (ch - '0');
}
else if(ch >= 'a' && ch <= 'f')
{
a |= (ch - 'a') + 10;
}
else
{
return -2;
}
result[j] = a;
i += 3;
}
else
break;
}
else if((ch >= 'A' && ch <= 'Z') ||
(ch >= 'a' && ch <= 'z') ||
(ch >= '0' && ch <= '9') ||
ch == '.' || ch == '-' || ch == '*' || ch == '_')
{
result[j] = ch;
i += 1;
}
else
{
return -2;
}
}
*last_pos = str + i;
return j;
}
// return < 0: represent failure
int main(int argc, char *argv[])
{
int fd = -1;
char buf[4096], result[4096];
char *start_pos;
const char *last_pos;
int ret, sz;
int i = 0;
if(argc != 2)
{
printf("please input the encoding filename\n");
return -1;
}
if((fd = open(argv[1], O_RDONLY)) == -1)
{
printf("open file %s failure\n", argv[1]);
return -2;
}
start_pos = buf;
last_pos = NULL;
while((ret = read(fd, start_pos, buf + 4096 - start_pos)) >= 0)
{
if(ret == 0)
{
if(start_pos == buf)
break;
else
{
ret = -3;
break;
}
}
sz = URLDecode(buf, start_pos - buf + ret, result, 4096, &last_pos);
if(sz < 0)
{
ret = -4;
break;
}
if(last_pos != start_pos + ret)
{
memcpy(buf, last_pos, start_pos + ret - last_pos);
start_pos = buf + (start_pos + ret - last_pos);
}
else
{
start_pos = buf;
}
for(i = 0; i < sz; i++)
printf("%c", result[i]);
}
if(ret < 0)
{
printf("decode data failure\n");
}
close(fd);
return ret;
}
unsigned char ToHex(unsigned char x)
{
return x > 9 ? x + 55 : x + 48;
}
unsigned char FromHex(unsigned char x)
{
unsigned char y;
if (x >= 'A' && x <= 'Z') y = x - 'A' + 10;
else if (x >= 'a' && x <= 'z') y = x - 'a' + 10;
else if (x >= '0' && x <= '9') y = x - '0';
else assert(0);
return y;
}
std::string UrlEncode(const std::string &str)
{
std::string strTemp = "";
size_t length = str.length();
for (size_t i = 0; i < length; i++)
{
if (isalnum((unsigned char)str[i]) ||
(str[i] == '-') ||
(str[i] == '_') ||
(str[i] == '.') ||
(str[i] == '~'))
strTemp += str[i];
else if (str[i] == ' ')
strTemp += "+";
else
{
strTemp += '%';
strTemp += ToHex((unsigned char)str[i] >> 4);
strTemp += ToHex((unsigned char)str[i] % 16);
}
}
return strTemp;
}
std::string UrlDecode(const std::string &str)
{
std::string strTemp = "";
size_t length = str.length();
for (size_t i = 0; i < length; i++)
{
if (str[i] == '+') strTemp += ' ';
else if (str[i] == '%')
{
assert(i + 2 < length);
unsigned char high = FromHex((unsigned char)str[++i]);
unsigned char low = FromHex((unsigned char)str[++i]);
strTemp += high * 16 + low;
}
else strTemp += str[i];
}
return strTemp;
}
URL编码
C
#include <stdio.h>
#include <ctype.h>
char rfc3986[256] = {0};
char html5[256] = {0};
/* caller responsible for memory */
void encode(const char *s, char *enc, char *tb)
{
for (; *s; s++) {
if (tb[*s]) sprintf(enc, "%c", tb[*s]);
else sprintf(enc, "%%%02X", *s);
while (*++enc);
}
}
int main()
{
const char url[] = "http://foo bar/";
char enc[(strlen(url) * 3) + 1];
int i;
for (i = 0; i < 256; i++) {
rfc3986[i] = isalnum(i)||i == '~'||i == '-'||i == '.'||i == '_'
? i : 0;
html5[i] = isalnum(i)||i == '*'||i == '-'||i == '.'||i == '_'
? i : (i == ' ') ? '+' : 0;
}
encode(url, enc, rfc3986);
puts(enc);
return 0;
}
base64编码/解码
static const std::string base64_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
static inline bool is_base64(unsigned char c)
{
return (isalnum(c) || (c == '+') || (c == '/'));
}
unsigned int base64_encode(const unsigned char* bytes_to_encode, unsigned int in_len, unsigned char* encoded_buffer, unsigned int& out_len)
{
int i = 0;
int j = 0;
unsigned char char_array_3[3] = { 0, 0, 0 };
unsigned char char_array_4[4] = { 0, 0, 0, 0 };
out_len = 0;
while (in_len--)
{
char_array_3[i++] = *(bytes_to_encode++);
if (i == 3)
{
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;
for (i = 0; i < 4 ; i++)
{
encoded_buffer[out_len++] = base64_chars[char_array_4[i]];
}
i = 0;
}
}
if (i)
{
for (j = i; j < 3; j++)
{
char_array_3[j] = '\0';
}
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;
for (j = 0; j < (i + 1); j++)
{
encoded_buffer[out_len++] = base64_chars[char_array_4[j]];
}
while (i++ < 3)
{
encoded_buffer[out_len++] = '=';
}
}
return out_len;
}
unsigned int base64_decode(const unsigned char* encoded_string, unsigned int in_len, unsigned char* decoded_buffer, unsigned int& out_len)
{
size_t i = 0;
size_t j = 0;
int in_ = 0;
unsigned char char_array_3[3] = { 0, 0, 0 };
unsigned char char_array_4[4] = { 0, 0, 0, 0 };
out_len = 0;
while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_]))
{
char_array_4[i++] = encoded_string[in_]; in_++;
if (i == 4)
{
for (i = 0; i < 4; i++)
{
char_array_4[i] = static_cast<unsigned char>(base64_chars.find(char_array_4[i]));
}
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (i = 0; i < 3; i++)
{
decoded_buffer[out_len++] = char_array_3[i];
}
i = 0;
}
}
if (i)
{
for (j = i; j < 4; j++)
{
char_array_4[j] = 0;
}
for (j = 0; j < 4; j++)
{
char_array_4[j] = static_cast<unsigned char>(base64_chars.find(char_array_4[j]));
}
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (j = 0; (j < i - 1); j++)
{
decoded_buffer[out_len++] = char_array_3[j];
}
}
return out_len;
}
C++
#include <QByteArray>
#include <iostream>
int main( ) {
QByteArray text ( "http://foo bar/" ) ;
QByteArray encoded( text.toPercentEncoding( ) ) ;
std::cout << encoded.data( ) << '\n' ;
return 0 ;
}
C#
using System;
namespace URLEncode
{
internal class Program
{
private static void Main(string[] args)
{
Console.WriteLine(Encode("http://foo bar/"));
}
private static string Encode(string uri)
{
return Uri.EscapeDataString(uri);
}
}
}
Go
package main
import (
"fmt"
"net/url"
)
func main() {
fmt.Println(url.QueryEscape("http://foo bar/"))
}
Java
The built-in URLEncoder in Java converts the space “ “ into a plus-sign “+” instead of “%20”:
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
public class Main
{
public static void main(String[] args) throws UnsupportedEncodingException
{
String normal = "http://foo bar/";
String encoded = URLEncoder.encode(normal, "utf-8");
System.out.println(encoded);
}
}
Python
import urllib
s = 'http://foo/bar/'
s = urllib.quote(s)
There is also urllib.quote_plus(), which also encodes spaces as “+” signs