马上注册,结交更多好友,享用更多功能,让你轻松玩转社区。
您需要 登录 才可以下载或查看,没有账号?立即注册
x
利用<iconv.h>来进行字符串编码的转换
- #include <iconv.h>
- #include <iostream>
- #include <string.h>
- #include <unistd.h>
- #include <memory>
- #include <fcntl.h>
- // 需要链接iconv库
- // iconv -l 命令可列出所有支持的格式
- // example: iconv将UTF-16转换为UTF-8
- // iconv -f UTF-16 -t UTF-8 myfile
- // 注意: UTF-16 默认使用的是 UTF-16 BE
- void print_str_bytes(const char* str, size_t len) {
- for (int i = 0; i < len; i++) {
- char s = str[i];
- std::cout << i << ": " << ((int)s & 0xff) << std::endl;
- }
- }
- int convert_encoding(char** dst, size_t* dst_len, const char* src, size_t src_len, const char* to_encoding, const char* from_encoding) {
- iconv_t cd = iconv_open(to_encoding, from_encoding);
- if (cd == reinterpret_cast<iconv_t>(-1)) {
- std::cerr << "iconv_open error: " << strerror(errno) << std::endl;
- return -1;
- }
- size_t sl = src_len;
- size_t total = 2 * src_len; // BOM头占2字节 FEFF(UTF-16 BE)
- size_t tmp = total;
- char* outbuf = new char[total];
- std::unique_ptr<char[]> ptr(outbuf); // 用智能指针控制内存自动释放
- int res = iconv(cd, const_cast<char**>(&src), &sl, &outbuf, &tmp);
- if (res == -1) {
- std::cerr << "iconv error: " << strerror(errno) << std::endl;
- iconv_close(cd);
- return -1;
- }
- // FIXME 此处使用的是全部重新转换方案,比较浪费性能,仅适用于演示,实际应该注重src未转换的长度值
- while (sl != 0) {
- total *= 2; // 倍乘
- tmp = total;
- ptr.release();
- delete[] outbuf;
- outbuf = new char[total]; // 重新申请内存
- ptr = std::unique_ptr<char[]>(outbuf);
- res = iconv(cd, const_cast<char**>(&src), &sl, &outbuf, &tmp);
- if (res == -1) {
- std::cerr << "iconv error: " << strerror(errno) << std::endl;
- iconv_close(cd);
- return -1;
- }
- }
- std::cout << "use bytes: " << total - tmp << std::endl;
- *dst_len = total - tmp;
- *dst = ptr.release(); // 不再需要智能指针管控内存
- res = iconv_close(cd);
- if (res == -1) {
- std::cerr << "iconv_close error: " << strerror(errno) << std::endl;
- return -1;
- }
-
- return 0;
- }
- int main(int argc, char* argv[]) {
- if (argc != 2) {
- std::cout << "./iconv <src-str>" << std::endl;
- return 0;
- }
- char* str = argv[1];
- print_str_bytes(str, strlen(str));
-
- char* dst = nullptr;
- size_t dst_len = 0;
- int res = convert_encoding(&dst, &dst_len, str, strlen(str), "UTF-16", "UTF-8");
- if (res == -1) {
- std::cerr << "oops..." << std::endl;
- exit(-1);
- }
- std::cout << "dst_len: " << dst_len << std::endl;
- print_str_bytes(dst, dst_len);
- // 写入到文件
- int fd = open("out.txt", O_RDWR| O_CREAT | O_TRUNC, S_IRWXU);
- if (fd == -1) {
- std::cerr << "open out.txt error: " << strerror(errno) << std::endl;
- exit(-1);
- }
- write(fd, dst, dst_len);
- return 0;
- }
复制代码 编译:
- c++ -std=c++14 -liconv iconv.cpp -o iconv
复制代码 输出:
- ./iconv 你hao,世界
- 0: 228
- 1: 189
- 2: 160
- 3: 104
- 4: 97
- 5: 111
- 6: 239
- 7: 188
- 8: 140
- 9: 228
- 10: 184
- 11: 150
- 12: 231
- 13: 149
- 14: 140
- use bytes: 16
- dst_len: 16
- 0: 254
- 1: 255
- 2: 79
- 3: 96
- 4: 0
- 5: 104
- 6: 0
- 7: 97
- 8: 0
- 9: 111
- 10: 255
- 11: 12
- 12: 78
- 13: 22
- 14: 117
- 15: 76
复制代码 利用iconv -l命令
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。 |