如何判断一个文件的编码是GBK还是UTF-8?
- 内容介绍
- 文章标签
- 相关推荐
本文共计352个文字,预计阅读时间需要2分钟。
请输入文件或文件路径,输出编码格式:UTF-8
package net.vicp.fyhui.van.util;
import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.io.BufferedReader;
输入文件或文件路径,输出编码格式package net.vicp.fyhui.van.util; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; public class CodeType { public CodeType() { } /** * 判断文件的编码格式,有中文的情况很准确,只有英文时,会判断为UTF-8 * @param object为File或String型 * @return * @throws IOException * @throws FileNotFoundException */ public static String whichEncoding(Object object) throws IOException, FileNotFoundException{ long sizeGBK,sizeUTF8; String encoding = null; InputStreamReader readGBK = null; InputStreamReader readUTF8 = null; if(object instanceof File){ readGBK = new InputStreamReader(new FileInputStream((File)object), "GBK"); readUTF8 = new InputStreamReader(new FileInputStream((File)object), "UTF-8"); }else if(object instanceof String){ readGBK = new InputStreamReader(new FileInputStream((String)object), "GBK"); readUTF8 = new InputStreamReader(new FileInputStream((String)object), "UTF-8"); } BufferedReader bReaderGBK = new BufferedReader(readGBK); BufferedReader bReaderUTF8 = new BufferedReader(readUTF8); String lineGBK; String lineUTF8; String sGBK = ""; String sUTF8 = ""; while(((lineGBK = bReaderGBK.readLine()) != null) && ((lineUTF8 = bReaderUTF8.readLine()) != null)){ //同步读取每一行 sGBK = sGBK+lineGBK; //取出每一行,组成字符串 sUTF8 = sUTF8+lineUTF8; //取出每一行,组成字符串 if(sGBK.length()==sUTF8.length()){ if(sGBK.length()>2 && sUTF8.length()>2){ if(!sGBK.substring(3).equals(sUTF8.substring(3))){ break; } } } } String tmpEncoding = sGBK.length() < sUTF8.length() ? "GBK" : "UTF-8" ; //只有英文时,会判断为UTF-8 // String tmpEncoding = sGBK.length() > sUTF8.length() ? "UTF-8" : "GBK" ; //只有英文时,会判断为GBK // System.out.println("Encoding ==> " + tmpEncoding); return tmpEncoding; } }
本文共计352个文字,预计阅读时间需要2分钟。
请输入文件或文件路径,输出编码格式:UTF-8
package net.vicp.fyhui.van.util;
import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.io.BufferedReader;
输入文件或文件路径,输出编码格式package net.vicp.fyhui.van.util; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; public class CodeType { public CodeType() { } /** * 判断文件的编码格式,有中文的情况很准确,只有英文时,会判断为UTF-8 * @param object为File或String型 * @return * @throws IOException * @throws FileNotFoundException */ public static String whichEncoding(Object object) throws IOException, FileNotFoundException{ long sizeGBK,sizeUTF8; String encoding = null; InputStreamReader readGBK = null; InputStreamReader readUTF8 = null; if(object instanceof File){ readGBK = new InputStreamReader(new FileInputStream((File)object), "GBK"); readUTF8 = new InputStreamReader(new FileInputStream((File)object), "UTF-8"); }else if(object instanceof String){ readGBK = new InputStreamReader(new FileInputStream((String)object), "GBK"); readUTF8 = new InputStreamReader(new FileInputStream((String)object), "UTF-8"); } BufferedReader bReaderGBK = new BufferedReader(readGBK); BufferedReader bReaderUTF8 = new BufferedReader(readUTF8); String lineGBK; String lineUTF8; String sGBK = ""; String sUTF8 = ""; while(((lineGBK = bReaderGBK.readLine()) != null) && ((lineUTF8 = bReaderUTF8.readLine()) != null)){ //同步读取每一行 sGBK = sGBK+lineGBK; //取出每一行,组成字符串 sUTF8 = sUTF8+lineUTF8; //取出每一行,组成字符串 if(sGBK.length()==sUTF8.length()){ if(sGBK.length()>2 && sUTF8.length()>2){ if(!sGBK.substring(3).equals(sUTF8.substring(3))){ break; } } } } String tmpEncoding = sGBK.length() < sUTF8.length() ? "GBK" : "UTF-8" ; //只有英文时,会判断为UTF-8 // String tmpEncoding = sGBK.length() > sUTF8.length() ? "UTF-8" : "GBK" ; //只有英文时,会判断为GBK // System.out.println("Encoding ==> " + tmpEncoding); return tmpEncoding; } }

