Java處理UTF-8帶BOM的文本的讀寫 - 網頁設計教學

文章推薦指數: 80 %
投票人數:10人

BOM(byte-order mark),即字節順序標記,它是插入到以UTF-8、UTF16或UTF-32編碼Unicode文件開頭的特殊標記,用來識別Unicode文件的編碼類型。

跳至主要內容   什麼是BOM   BOM(byte-ordermark),即字節順序標記,它是插入到以UTF-8、UTF16或UTF-32編碼Unicode文件開頭的特殊標記,用來識別Unicode文件的編碼類型。

對於UTF-8來說,BOM並不是必須的,因為BOM用來標記多字節編碼文件的編碼類型和字節順序(big-endian或little-endian)。

  BOMs文件頭:   0000FEFF   =UTF-32,big-endian   FFFE0000   =UTF-32,little-endian   EFBBBF      =UTF-8,   FEFF         =UTF-16,big-endian   FFFE         =UTF-16,little-endian   ‍   下面舉個例子,針對UTF-8的文件BOM做個處理:   String xmla = StringFileToolkit.file2String(new File(“D:\\projects\\mailpost\\src\\a.xml”),“UTF-8”);   byte[] b = xmla.getBytes(“UTF-8”);   String xml = new String(b,3,b.length-3,“UTF-8”);   …………..   思路是:先按照UTF-8編碼讀取文件後,跳過前三個字符,重新構建一個新的字符串,然後用Dom4j解析處理,這樣就不會報錯瞭。

  其他編碼的方式處理思路類似,其實可以寫一個通用的自動識別的BOM的工具,去掉BOM信息,返回字符串。

  不過這個處理過程已經有牛人解決過瞭:https://koti.mbnet.fi/akini/java/unicodereader/   Java代碼  ‍ExamplecodeusingUnicodeReaderclass  Hereisanexamplemethodtoreadtextfile.Itwillrecognizebommarkerandskipitwhilereading.     //import‍https://koti.mbnet.fi/akini/java/unicodereader/UnicodeReader.java.txt    publicstaticchar[]loadFile(Stringfile)throwsIOException{       //readtextfile,autorecognizebommarkeroruse        //systemdefaultifmarkersnotfound.       BufferedReaderreader=null;       CharArrayWriterwriter=null;       UnicodeReaderr=newUnicodeReader(newFileInputStream(file),null);           char[]buffer=newchar[16*1024];  //16kbuffer       intread;       try{          reader=newBufferedReader(r);          writer=newCharArrayWriter();          while((read=reader.read(buffer))!=-1){             writer.write(buffer,0,read);          }          writer.flush();          returnwriter.toCharArray();       }catch(IOExceptionex){          throwex;       }finally{          try{             writer.close();reader.close();r.close();          }catch(Exceptionex){}       }    }    Java代碼  ExamplecodetowriteUTF-8withbommarker  Writebommarkerbytestostartofemptyfileandallpropertexteditorshavenoproblemsusingacorrectcharsetwhilereadingfiles.Java'sOutputStreamWriterdoesnotwriteutf8bommarkerbytes.         publicstaticvoidsaveFile(Stringfile,Stringdata,booleanappend)throwsIOException{       BufferedWriterbw=null;       OutputStreamWriterosw=null;           Filef=newFile(file);       FileOutputStreamfos=newFileOutputStream(f,append);       try{          //writeUTF8BOMmarkiffileisempty          if(f.length()<1){            finalbyte[]bom=newbyte[]{(byte)0xEF,(byte)0xBB,(byte)0xBF};             fos.write(bom);          }            osw=newOutputStreamWriter(fos,"UTF-8");          bw=newBufferedWriter(osw);          if(data!=null)bw.write(data);       }catch(IOExceptionex){          throwex;       }finally{          try{bw.close();fos.close();}catch(Exceptionex){}       }    }         實際應用: Java代碼  packagecom.dayo.gerber;    importjava.io.BufferedReader;  importjava.io.BufferedWriter;  importjava.io.File;  importjava.io.FileInputStream;  importjava.io.FileOutputStream;  importjava.io.IOException;  importjava.io.InputStream;  importjava.io.InputStreamReader;  importjava.io.OutputStreamWriter;  importjava.io.Reader;  importjava.util.Properties;    /**  *   *@author劉飛(liufei)  *   */  publicclassGenerate4YYQTPScript{       privatestaticfinalStringENCODING="UTF-8";     privatestaticfinalStringGERBER_CONFIG="config/gerber4yy.properties";       privatestaticPropertiesGERBER_CONFIG_PROPS=null;     privatestaticfinalStringGERBER_FORMAT_DIALOG_TITLE_SCRIPT="{#GERBER_FORMAT_DIALOG_TITLE}";     privatestaticStringGERBER_FORMAT_DIALOG_TITLE="";       /*gerberpropertiesparmterskeysconfig*/     privatestaticfinalStringQTP_SCRIPT_IN="script.in";       privatestaticfinalStringQTP_SCRIPT_OUT="script.out";       privatestaticfinalStringQTP_SYSTEM_PATH="QTP.system.path";     privatestaticfinalStringQTP_SYSTEM_PATH_SCRIPT="{#QTPSYSTEMPATH}";       privatestaticfinalStringGERBER_FILE_DRIVER_PATH="gerber.file.driver.path";     privatestaticfinalStringGERBER_FILE_DRIVER_PATH_SCRIPT="{#driver}";       privatestaticfinalStringGERBER_FILE_DRIVER="gerber.file.driver";     privatestaticfinalStringGERBER_FILE_DRIVER_SCRIPT="{#dr}";       privatestaticfinalStringGERBER_FILE_DIR="gerber.file.dir";     privatestaticfinalStringGERBER_FILE_DIR_SCRIPT="{#dirName}";       privatestaticfinalStringGERBER_FILE="gerber.file";     privatestaticfinalStringGERBER_FILE_SCRIPT="{#fileName}";       privatestaticfinalStringGERBER_OUT="gerber.out";     privatestaticfinalStringGERBER_OUT_SCRIPT="{#gerberout}";       privatestaticfinalStringVB_EXE_PATH="vb.exe.path";       /*bigBoardprops*/     privatestaticfinalStringLEAGUE_BOARD_NUM_SCRIPT="{#LEAGUE_BOARD_NUM}";     privatestaticfinalStringWIDTH_SCRIPT="{#WIDTH}";     privatestaticfinalStringP_SCRIPT="{#P}";     privatestaticfinalStringDY_SCRIPT="{#DY}";       privatePropertiesBIGBOARD_PROPS=null;       publicGenerate4YYQTPScript(Propertiesbigboard_props){         super();         BIGBOARD_PROPS=bigboard_props;           try{             GERBER_CONFIG_PROPS=ConfigHelper                     .getConfigProperties(GERBER_CONFIG);             GERBER_FORMAT_DIALOG_TITLE=GERBER_CONFIG_PROPS.getProperty(                     GERBER_FILE_DRIVER).trim().toUpperCase()                     +"\\"                     +GERBER_CONFIG_PROPS.getProperty(GERBER_FILE_DIR).trim()                             .toUpperCase()                     +"\\"                     +GERBER_CONFIG_PROPS.getProperty(GERBER_FILE).trim()                             .toUpperCase();             GERBER_FORMAT_DIALOG_TITLE=GERBER_FORMAT_DIALOG_TITLE.substring(0,17);         }catch(IOExceptione){             e.printStackTrace();         }     }       publicstaticvoidmain(String[]args)throwsIOException{         Propertiesbigboard_props=newProperties();         bigboard_props.setProperty("{#LEAGUE_BOARD_NUM}",String.valueOf(4));         bigboard_props.setProperty("{#WIDTH}",String.valueOf(newDouble("54")));         bigboard_props.setProperty("{#P}",String.valueOf(newDouble("2")));         bigboard_props.setProperty("{#DY}",String.valueOf(newDouble("0.00")));                   Generate4YYQTPScriptgenerateQTPScript=newGenerate4YYQTPScript(bigboard_props);         generateQTPScript.generateQTPScript();  //     RuntimeUtil.getInstance().run(generateQTPScript.getVBEXE(),1,50000);     }       publicStringgetCheckOutFilePath(){         returnGERBER_CONFIG_PROPS.getProperty(GERBER_FILE_DRIVER).trim()+"/"                 +GERBER_CONFIG_PROPS.getProperty(GERBER_FILE_DIR).trim();     }       publicStringgetSavePath(){         returnGERBER_CONFIG_PROPS.getProperty(GERBER_OUT);     }       publicStringgetVBEXE(){         returnGERBER_CONFIG_PROPS.getProperty(VB_EXE_PATH);     }       /**     *GenerateQTPScript     *      *@return     *@throwsIOException     */     publicFilegenerateQTPScript()throwsIOException{         returngenerateQTPScript(GERBER_CONFIG_PROPS                 .getProperty(QTP_SCRIPT_OUT),GERBER_CONFIG_PROPS                 .getProperty(QTP_SCRIPT_IN));     }       /**     *setvaluetoscript     *      *@paramsource     *@return     *@throwsIOException     */     privateStringscriptConvey(Stringsource)throwsIOException{         String_source=source;         _source=this.replace(this.replace(this.replace(                 this.replace(this.replace(this.replace(this.replace(                                                   _source                         ,                         GERBER_FORMAT_DIALOG_TITLE_SCRIPT,                         GERBER_FORMAT_DIALOG_TITLE),GERBER_FILE_SCRIPT,                         GERBER_CONFIG_PROPS.getProperty(GERBER_FILE)),                         GERBER_FILE_DRIVER_SCRIPT,GERBER_CONFIG_PROPS                                 .getProperty(GERBER_FILE_DRIVER)),                         GERBER_OUT_SCRIPT,GERBER_CONFIG_PROPS                                 .getProperty(GERBER_OUT)),                 GERBER_FILE_DIR_SCRIPT,GERBER_CONFIG_PROPS                         .getProperty(GERBER_FILE_DIR)),                 GERBER_FILE_DRIVER_PATH_SCRIPT,GERBER_CONFIG_PROPS                         .getProperty(GERBER_FILE_DRIVER_PATH)),                 QTP_SYSTEM_PATH_SCRIPT,GERBER_CONFIG_PROPS                         .getProperty(QTP_SYSTEM_PATH));           if(this.BIGBOARD_PROPS!=null){             _source=this.replace(this.replace(this.replace(                                           _source                                           ,                     DY_SCRIPT,this.BIGBOARD_PROPS.getProperty(DY_SCRIPT)),                     WIDTH_SCRIPT,this.BIGBOARD_PROPS                             .getProperty(WIDTH_SCRIPT)),                     LEAGUE_BOARD_NUM_SCRIPT,this.BIGBOARD_PROPS                             .getProperty(LEAGUE_BOARD_NUM_SCRIPT));                           _source=this.replace(_source,P_SCRIPT,this.BIGBOARD_PROPS.getProperty(P_SCRIPT));         }           return_source;     }       /**     *GenerateQTPScript     *      *@paramtarget     *           targetfile     *@paramsource     *           sourcefile     *@throwsIOException     */     publicFilegenerateQTPScript(Filetarget,Filesource)throwsIOException{         returngenerateQTPScript(target.getAbsolutePath(),source                 .getAbsolutePath());     }       /**     *GenerateQTPScript     *      *@paramtarget     *           targetfilepath     *@paramsource     *           sourcefilepath     *@return     *@throwsIOException     */     publicFilegenerateQTPScript(Stringtarget,Stringsource)             throwsIOException{         Filef=newFile(target);         if(!f.exists()){             f.getParentFile().mkdirs();             try{                 f.createNewFile();             }catch(Exceptione){             }         }         FileOutputStreamfos=null;         OutputStreamWriterosw=null;         BufferedWriterbw=null;         try{             finalbyte[]bom=newbyte[]{(byte)0xEF,(byte)0xBB,(byte)0xBF};             fos=newFileOutputStream(f);             osw=newOutputStreamWriter(fos,ENCODING);             bw=newBufferedWriter(osw);             fos.write(bom);             bw.write(scriptConvey(getSourceFileContentReader(source)));               bw.flush();             bw.close();             returnf;         }catch(IOExceptione){             throwe;         }     }       /**     *Readerconveytostring     *      *@paramsource     *@return     *@throwsIOException     */     privateStringreader2String(Readersource)throwsIOException{         BufferedReaderbufferedReader=newBufferedReader(source);         StringBufferresult=newStringBuffer();         Stringbuffer=null;         while((buffer=bufferedReader.readLine())!=null){             result.append(buffer+"\n");         }         returnresult.toString();     }       /**     *      *@paramsource     *           filepath     *@return     *@throwsIOException     */     privateReadergetReader(Stringsource)throwsIOException{         returnsource==""?null:newBufferedReader(newInputStreamReader(                 getInputStream(source)));     }       /**     *getscriptfilecontentstring     *      *@paramsource     *@return     *@throwsIOException     */     privateStringgetSourceFileContentReader(Stringsource)throwsIOException{         returnsource==""?"":reader2String(getReader(source));     }       /**     *getinputstream     *      *@paramsource     *           filepath     *@return     *@throwsIOException     */     privateInputStreamgetInputStream(Stringsource)throwsIOException{         returnsource==""?null:newFileInputStream(newFile(source));     }       /**     *Replacealloccurencesofasubstringwithinastringwithanother     *string.     *      *@paraminString     *           Stringtoexamine     *@paramoldPattern     *           Stringtoreplace     *@paramnewPattern     *           Stringtoinsert     *@returnaStringwiththereplacements     */     privateStringreplace(StringinString,StringoldPattern,StringnewPattern){         if(!hasLength(inString)||!hasLength(oldPattern)                 ||newPattern==null){             returninString;         }         StringBuildersb=newStringBuilder();         intpos=0;         intindex=inString.indexOf(oldPattern);         intpatLen=oldPattern.length();         while(index>=0){             sb.append(inString.substring(pos,index));             sb.append(newPattern);             pos=index+patLen;             index=inString.indexOf(oldPattern,pos);         }         sb.append(inString.substring(pos));         returnsb.toString();     }       privatebooleanhasLength(Stringstr){         returnhasLength((CharSequence)str);     }       privatebooleanhasLength(CharSequencestr){         return(str!=null&&str.length()>0);     }  }   發佈留言取消回覆發佈留言必須填寫的電子郵件地址不會公開。

必填欄位標示為*留言*顯示名稱* 電子郵件地址* 個人網站網址 在瀏覽器中儲存顯示名稱、電子郵件地址及個人網站網址,以供下次發佈留言時使用。

文章導覽 上一篇文章上一篇文章:java定時操作之Timer和TimerTas–JAVA編程語言程序開發技術文章下一篇文章下一篇文章:Java調用WebService服務方法總結–JAVA編程語言程序開發技術文章 搜尋搜尋Android Android學習 android開發 IOS JavaScript MySQL 事件 代碼 函數 功能 加載 動畫 區別 基礎 字符串 學習筆記 安卓 安卓軟體開發 實例 常用 手機 技術文章 控件 操作 教學 教程 數據 數據庫 數組 文件 方法 框架 模式 機制 源碼 示例 程序開發 程式設計 簡單 編程語言 自定義 解決辦法 詳解 語句 錯誤虛擬主機推薦! 虛擬主機推薦! 設計模式學習之結構型–享元模式(FlyweightPattern)實例講解 設計模式學習之結構型–代理模式(ProxyPattern)實例講解 設計模式學習之代理模式–動態代理機制實例講解 Java工程師進階至架構師所需瞭解的基礎知識總結 Java集合類(容器類)、Map集合總結



請為這篇文章評分?