如何高效实现GBK与UTF-8编码互转及添加删除BOM,为Easy CHM合成前的文件转码改写?
- 内容介绍
- 文章标签
- 相关推荐
本文共计725个文字,预计阅读时间需要3分钟。
javapackage linwancheng.charset;
import java.io.File;import java.io.FileFilter;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.BufferedReader;import java.io.BufferedWriter;
package linwancheng.charset;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
/**
* 文件转码工具类,支持增删 UTF-8 的 BOM,添加HTML4标签兼容旧CHM阅读器
*
* @param filter
* 过滤器,可为空,或new NameEnd(拓展名...)
* @param srcPath
* @param descPath
* @param inCharset
* @param outCharset
* @param addH4charset
* 是否添加HTML4标签兼容旧CHM阅读器,true/flase
* @param addBOM
* +1 添加BOM,-1 删除,0 不变
*
* @author linWanCheng
* @version 2.0
*/
public class CharsetUnits {
final static String BOM = new String(new byte[] { -17, -69, -65 });
/** srcfile 转码到 descfile 不带BOM */
public static void src2desc(FileFilter filter, String inCharset, String outCharset) {
src2desc(filter, "srcfile", "descfile", inCharset, outCharset, false, -1);
}
/** srcfile 转码到 descfile */
public static void src2desc(FileFilter filter, String inCharset, String outCharset, boolean addH4charset, int addBOM) {
src2desc(filter, "srcfile", "descfile", inCharset, outCharset, addH4charset, addBOM);
}
/** 文件夹转码 */
public static void src2desc(FileFilter filter, String srcPath, String descPath, String inCharset,
String outCharset, boolean addH4charset, int addBOM) {
ArrayList
package linwancheng.charset; import org.junit.Test; /** * 转码工具测试类 * 需要Junit 单元测试工具包 * MyEclipse 下点击方法名运行 */ public class CharsetTest { /** * MyEclipse文件转到到IDEA用 */ @Test public void DelBOM() { CharsetUnits.src2desc(null, "UTF-8", "UTF-8", false, -1); } @Test public void GBKToUTF8() { CharsetUnits.src2desc(null, "GBK", "UTF-8", false, -1); } /** * EasyCHM合成前添加BOM */ @Test public void ForEasyCHM() { CharsetUnits.src2desc(new NameEnd("htm", "html"), "GBK", "UTF-8", true, +1); } @Test public void UTF8ToGBK() { CharsetUnits.src2desc(null, "UTF-8", "GBK", false, -1); } }
本文共计725个文字,预计阅读时间需要3分钟。
javapackage linwancheng.charset;
import java.io.File;import java.io.FileFilter;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.BufferedReader;import java.io.BufferedWriter;
package linwancheng.charset;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
/**
* 文件转码工具类,支持增删 UTF-8 的 BOM,添加HTML4标签兼容旧CHM阅读器
*
* @param filter
* 过滤器,可为空,或new NameEnd(拓展名...)
* @param srcPath
* @param descPath
* @param inCharset
* @param outCharset
* @param addH4charset
* 是否添加HTML4标签兼容旧CHM阅读器,true/flase
* @param addBOM
* +1 添加BOM,-1 删除,0 不变
*
* @author linWanCheng
* @version 2.0
*/
public class CharsetUnits {
final static String BOM = new String(new byte[] { -17, -69, -65 });
/** srcfile 转码到 descfile 不带BOM */
public static void src2desc(FileFilter filter, String inCharset, String outCharset) {
src2desc(filter, "srcfile", "descfile", inCharset, outCharset, false, -1);
}
/** srcfile 转码到 descfile */
public static void src2desc(FileFilter filter, String inCharset, String outCharset, boolean addH4charset, int addBOM) {
src2desc(filter, "srcfile", "descfile", inCharset, outCharset, addH4charset, addBOM);
}
/** 文件夹转码 */
public static void src2desc(FileFilter filter, String srcPath, String descPath, String inCharset,
String outCharset, boolean addH4charset, int addBOM) {
ArrayList
package linwancheng.charset; import org.junit.Test; /** * 转码工具测试类 * 需要Junit 单元测试工具包 * MyEclipse 下点击方法名运行 */ public class CharsetTest { /** * MyEclipse文件转到到IDEA用 */ @Test public void DelBOM() { CharsetUnits.src2desc(null, "UTF-8", "UTF-8", false, -1); } @Test public void GBKToUTF8() { CharsetUnits.src2desc(null, "GBK", "UTF-8", false, -1); } /** * EasyCHM合成前添加BOM */ @Test public void ForEasyCHM() { CharsetUnits.src2desc(new NameEnd("htm", "html"), "GBK", "UTF-8", true, +1); } @Test public void UTF8ToGBK() { CharsetUnits.src2desc(null, "UTF-8", "GBK", false, -1); } }

