java自动识别文件编码格式UTF

发布时间:2024-12-10 21:35

编程中避免硬编码,利用配置文件替代 #生活知识# #生活经验# #编程#

最新推荐文章于 2024-10-22 12:41:37 发布

guying4875 于 2018-07-13 16:53:58 发布

版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。

背景:在解读properties配置文件时windows操作系统编辑过的内容上传后总是无法通过键获取文件中内容,讲过分析是文件的编码格式为UTF-8带BOM的,因此通过该程序获取文件编码格式

import java.io.BufferedInputStream;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.InputStreamReader;

import java.io.OutputStreamWriter;

import java.io.Writer;

import java.util.BitSet;

import org.slf4j.Logger;

import org.slf4j.LoggerFactory;

public class EncodeUtil {

private static Logger logger = LoggerFactory.getLogger(EncodeUtil.class);

private static int BYTE_SIZE = 8;

public static String CODE_UTF8 = "UTF-8";

public static String CODE_UTF8_BOM = "UTF-8_BOM";

public static String CODE_GBK = "GBK";

public static String getEncode(String fullFileName, boolean ignoreBom) throws Exception {

logger.debug("fullFileName ; {}", fullFileName);

BufferedInputStream bis = new BufferedInputStream(new FileInputStream(fullFileName));

return getEncode(bis, ignoreBom);

}

public static String getEncode(BufferedInputStream bis, boolean ignoreBom) throws Exception {

bis.mark(0);

String encodeType = "未识别";

byte[] head = new byte[3];

bis.read(head);

if (head[0] == -1 && head[1] == -2) {

encodeType = "UTF-16";

} else if (head[0] == -2 && head[1] == -1) {

encodeType = "Unicode";

} else if (head[0] == -17 && head[1] == -69 && head[2] == -65) {

if (ignoreBom) {

encodeType = CODE_UTF8;

} else {

encodeType = CODE_UTF8_BOM;

}

} else if ("Unicode".equals(encodeType)) {

encodeType = "UTF-16";

} else if (isUTF8(bis)) {

encodeType = CODE_UTF8;

} else {

encodeType = CODE_GBK;

}

logger.info("result encode type : " + encodeType);

return encodeType;

}

private static boolean isUTF8( BufferedInputStream bis) throws Exception {

bis.reset();

int code = bis.read();

do {

BitSet bitSet = convert2BitSet(code);

if (bitSet.get(0)) {

if (!checkMultiByte(bis, bitSet)) {

return false;

}

} else {

}

code = bis.read();

} while (code != -1);

return true;

}

private static boolean checkMultiByte(BufferedInputStream bis, BitSet bitSet) throws Exception {

int count = getCountOfSequential(bitSet);

byte[] bytes = new byte[count - 1];

bis.read(bytes);

for (byte b : bytes) {

if (!checkUtf8Byte(b)) {

return false;

}

}

return true;

}

private static boolean checkUtf8Byte(byte b) throws Exception {

BitSet bitSet = convert2BitSet(b);

return bitSet.get(0) && !bitSet.get(1);

}

private static int getCountOfSequential( BitSet bitSet) {

int count = 0;

for (int i = 0; i < BYTE_SIZE; i++) {

if (bitSet.get(i)) {

count++;

} else {

break;

}

}

return count;

}

private static BitSet convert2BitSet(int code) {

BitSet bitSet = new BitSet(BYTE_SIZE);

for (int i = 0; i < BYTE_SIZE; i++) {

int tmp3 = code >> (BYTE_SIZE - i - 1);

int tmp2 = 0x1 & tmp3;

if (tmp2 == 1) {

bitSet.set(i);

}

}

return bitSet;

}

public static void convert(String oldFullFileName, String oldCharsetName, String newFullFileName, String newCharsetName) throws Exception {

logger.info("the old file name is : {}, The oldCharsetName is : {}", oldFullFileName, oldCharsetName);

logger.info("the new file name is : {}, The newCharsetName is : {}", newFullFileName, newCharsetName);

StringBuffer content = new StringBuffer();

BufferedReader bin = new BufferedReader(new InputStreamReader(new FileInputStream(oldFullFileName), oldCharsetName));

String line;

while ((line = bin.readLine()) != null) {

content.append(line);

content.append(System.getProperty("line.separator"));

}

newFullFileName = newFullFileName.replace("\\", "/");

File dir = new File(newFullFileName.substring(0, newFullFileName.lastIndexOf("/")));

if (!dir.exists()) {

dir.mkdirs();

}

Writer out = new OutputStreamWriter(new FileOutputStream(newFullFileName), newCharsetName);

out.write(content.toString());

}

}

调用方式

public static void main(String[] args) {

try {

String filePath ="/home/zhanghy/文档/国寿/实时计算paas/ee/error/分/config/function_list.properties";

BufferedInputStream bis = new BufferedInputStream(new FileInputStream(filePath));

String code = EncodeUtil.getEncode(bis, false);

                        System.out.println("文件编码格式为:"+code);

} catch (Exception e) {

e.printStackTrace();

}

}


网址:java自动识别文件编码格式UTF https://www.yuejiaxmz.com/news/view/437311

相关内容

Java中String和byte[]间的转换
Python中GBK, UTF
Android studio 中文乱码的解决办法
A =Java基础与源码
VCODE修改控制台编码格式解决输出乱码问题
Python中关于coding=utf
基于Java的校园二手书交易平台设计与实现(源码+lw+部署文档+讲解等)
401基于java ssm springboot课外学习生活活动平台系统班级活动报名班级管理(源码+文档+运行视频+讲解视频)
mysql 连接url中useUnicode=true&characterEncoding=UTF
基于java+springboot+vue的二手旧物置换网站(源码+LW+部署讲解)

随便看看