Java - How to add and remove BOM from UTF-8 file

文章推薦指數: 80 %
投票人數:10人

The Unicode \ufeff represents 0xEF , 0xBB , 0xBF , read this. 1.1 The below example, write a BOM to a UTF-8 file /home/mkyong/file.txt . JavaIOTutorialHomeFileCreate&WriteFileReadFileAppendFileDeleteFileCopyFileRenameMoveFileFindFilePathFileTransferFileExistsFileSizeDirectoryCreateDirectoryDeleteDirectoryCopyDirectorySizeDirectoryWalk(Files.walk)TempFileCreateTempFileWriteTempFileDeleteTempFileFilePathSerializationandDeserializationHowTo-WorkingDirectoryHowTo-ReadFilefromResourcesHowTo-GetJARpathHowTo-ZipFileHowTo-UnZipFileHowTo-CompressGzipfileHowTo-DecompressGzipfileHowTo-TarGzip,tar.gzHowTo-FileSeparatorHowTo-CountLinesHowTo-NewLineHowTo-GetFileExtensionHowTo-GetFileCreationDateHowTo-GetFileLastModifiedHowTo-UpdateLastModifiedHowTo-FormatFileTimeHowTo-MoveFileToDirectoryHowTo-RemoteShellScriptHowTo-UTF-8ReadHowTo-UTF-8WriteHowTo-FiletoPathHowTo-Filetobyte[]HowTo-FiletoHexHowTo-FiletoStringHowTo-StringtoFileHowTo-byte[]toStringHowTo-byte[]toFileHowTo-byte[]toObjectHowTo-ImageReadWriteHowTo-ImageResizeHowTo-ReadCSVFileHowTo-WriteCSVFileHowTo-FindFileByExtensionHowTo-InputStreamtoBufferedReaderHowTo-InputStreamtoFileHowTo-StringtoInputStreamHowTo-FileInputStreamJava–HowtoaddandremoveBOMfromUTF-8fileBymkyong|Lastupdated:April14,2021Viewed:29,003(+471pv/w)Tags:bom|ByteBuffer|hex|java.io|readbytes|readfile|utf-8|writebytes|writefileThisarticleshowsyouhowtoadd,checkandremovethebyteordermark(BOM)fromaUTF-8file.TheUTF-8representationoftheBOMisthebytesequence0xEF,0xBB,0xBF(hexadecimal),atthebeginningofthefile.1.AddBOMtoaUTF-8file2.CheckifafilecontainsUTF-8BOM3.RemoveBOMfromaUTF-8file4.CopyafileandaddBOM5.DownloadSourceCode6.ReferencesFurtherReadingReadmoreaboutBOMandUTF-8P.SThebelowBOMexamplesonlyworksforUTF-8file.1.AddBOMtoaUTF-8fileToAddBOMtoaUTF-8file,wecandirectlywriteUnicode\ufefforthreebytes0xEF,0xBB,0xBFatthebeginningoftheUTF-8file.NoteTheUnicode\ufeffrepresents0xEF,0xBB,0xBF,readthis.1.1Thebelowexample,writeaBOMtoaUTF-8file/home/mkyong/file.txt.AddBomToUtf8File.java packagecom.mkyong.io.howto; importjava.io.BufferedWriter; importjava.io.IOException; importjava.nio.file.Files; importjava.nio.file.Path; importjava.nio.file.Paths; publicclassAddBomToUtf8File{ publicstaticvoidmain(String[]args)throwsIOException{ Pathpath=Paths.get("/home/mkyong/file.txt"); writeBomFile(path,"mkyong"); } privatestaticvoidwriteBomFile(Pathpath,Stringcontent){ //Java8defaultUTF-8 try(BufferedWriterbw=Files.newBufferedWriter(path)){ bw.write("\ufeff"); bw.write(content); bw.newLine(); bw.write(content); }catch(IOExceptione){ e.printStackTrace(); } } } OutputTerminal $hexdump-C/home/mkyong/file.txt 00000000efbbbf6d6b796f6e670a6d6b796f6e67|...mkyong.mkyong| 00000010 $file/home/mkyong/file.txt file.txt:UTF-8Unicode(withBOM)text $cat/home/mkyong/file.txt mkyong mkyong 1.2BeforeJava8,BufferedWriterandOutputStreamWriterexamplesofwritingBOMtoaUTF-8file. privatestaticvoidwriteBomFile(Pathpath,Stringcontent){ try(BufferedWriterbw=newBufferedWriter( newOutputStreamWriter( newFileOutputStream(path.toFile()) ,StandardCharsets.UTF_8))){ bw.write("\ufeff"); bw.write(content); bw.newLine(); bw.write(content); }catch(IOExceptione){ e.printStackTrace(); } } 1.3PrintWriterandOutputStreamWriterexampletowriteBOMtoaUTF-8file.The0xfeffisthebyteordermark(BOM)codepoint. privatestaticvoidwriteBomFile(Pathpath,Stringcontent){ try(PrintWriterpw=newPrintWriter( newOutputStreamWriter( newFileOutputStream(path.toFile()),StandardCharsets.UTF_8))){ //pw.write("\ufeff"); pw.write(0xfeff);//alternative,codepoint pw.write(content); pw.write(System.lineSeparator()); pw.write(content); }catch(IOExceptione){ e.printStackTrace(); } } 1.4Alternatively,wecanwritetheBOMbytesequence0xEF,0xBB,and0xBFdirectlytoafile. privatestaticvoidwriteBomFile4(Pathpath,Stringcontent){ try(FileOutputStreamfos=newFileOutputStream(path.toFile())){ byte[]BOM={(byte)0xEF,(byte)0xBB,(byte)0xBF}; fos.write(BOM); fos.write(content.getBytes(StandardCharsets.UTF_8)); fos.write(System.lineSeparator().getBytes(StandardCharsets.UTF_8)); fos.write(content.getBytes(StandardCharsets.UTF_8)); }catch(IOExceptione){ e.printStackTrace(); } } 2.CheckifafilecontainsUTF-8BOMThebelowexamplereadthefirst3bytesfromafileandcheckifitcontainsthe0xEF,0xBB,0xBFbytesequence.CheckBom.java packagecom.mkyong.io.howto; importorg.apache.commons.codec.binary.Hex; importjava.io.FileInputStream; importjava.io.IOException; importjava.io.InputStream; importjava.nio.file.Files; importjava.nio.file.Path; importjava.nio.file.Paths; publicclassCheckBom{ publicstaticvoidmain(String[]args)throwsIOException{ Pathpath=Paths.get("/home/mkyong/file.txt"); if(isContainBOM(path)){ System.out.println("FoundBOM!"); }else{ System.out.println("NoBOM."); } } privatestaticbooleanisContainBOM(Pathpath)throwsIOException{ if(Files.notExists(path)){ thrownewIllegalArgumentException("Path:"+path+"doesnotexists!"); } booleanresult=false; byte[]bom=newbyte[3]; try(InputStreamis=newFileInputStream(path.toFile())){ //readfirst3bytesofafile. is.read(bom); //BOMencodedasefbbbf Stringcontent=newString(Hex.encodeHex(bom)); if("efbbbf".equalsIgnoreCase(content)){ result=true; } } returnresult; } } OutputTerminal FoundBOM! Theimportorg.apache.commons.codec.binary.Hex;isinthebelowcommons-codeclibrary.Or,wecanuseoneofthesemethodstoconvertbytestohex.pom.xml commons-codec commons-codec 1.14 3.RemoveBOMfromaUTF-8fileThebelowexampleByteBuffertoremoveBOMfromaUTF-8file.P.SSomeXML,JSON,CSVparsersmayfailtoparseorprocessthefileifitcontainsBOMintheUTF-8file;itiscommontoremoveorskiptheBOMbeforeparsingthefile.RemoveBomFromUtf8File.java packagecom.mkyong.io.howto; importorg.apache.commons.codec.binary.Hex; importjava.io.BufferedWriter; importjava.io.FileInputStream; importjava.io.IOException; importjava.io.InputStream; importjava.nio.ByteBuffer; importjava.nio.file.Files; importjava.nio.file.Path; importjava.nio.file.Paths; publicclassRemoveBomFromUtf8File{ publicstaticvoidmain(String[]args)throwsIOException{ Pathpath=Paths.get("/home/mkyong/file.txt"); writeBomFile(path,"mkyong"); removeBom(path); } privatestaticvoidwriteBomFile(Pathpath,Stringcontent){ //Java8defaultUTF-8 try(BufferedWriterbw=Files.newBufferedWriter(path)){ bw.write("\ufeff"); bw.write(content); bw.newLine(); bw.write(content); }catch(IOExceptione){ e.printStackTrace(); } } privatestaticbooleanisContainBOM(Pathpath)throwsIOException{ if(Files.notExists(path)){ thrownewIllegalArgumentException("Path:"+path+"doesnotexists!"); } booleanresult=false; byte[]bom=newbyte[3]; try(InputStreamis=newFileInputStream(path.toFile())){ //read3bytesofafile. is.read(bom); //BOMencodedasefbbbf Stringcontent=newString(Hex.encodeHex(bom)); if("efbbbf".equalsIgnoreCase(content)){ result=true; } } returnresult; } privatestaticvoidremoveBom(Pathpath)throwsIOException{ if(isContainBOM(path)){ byte[]bytes=Files.readAllBytes(path); ByteBufferbb=ByteBuffer.wrap(bytes); System.out.println("FoundBOM!"); byte[]bom=newbyte[3]; //getthefirst3bytes bb.get(bom,0,bom.length); //remaining byte[]contentAfterFirst3Bytes=newbyte[bytes.length-3]; bb.get(contentAfterFirst3Bytes,0,contentAfterFirst3Bytes.length); System.out.println("Removethefirst3bytes,andoverwritethefile!"); //overridethesamepath Files.write(path,contentAfterFirst3Bytes); }else{ System.out.println("Thisfiledoesn'tcontainsUTF-8BOM!"); } } } OutputTerminal FoundBOM! Removethefirst3bytes,andoverwritethefile! 4.CopyafileandaddBOMThebelowexamplecopyofafileandaddaBOMtothetargetfile.CopyAndAddBomToXmlFile.java packagecom.mkyong.xml.sax; importjava.io.FileOutputStream; importjava.io.IOException; importjava.nio.file.Files; importjava.nio.file.Path; importjava.nio.file.Paths; publicclassCopyAndAddBomToXmlFile{ publicstaticvoidmain(String[]args){ Pathsrc=Paths.get("src/main/resources/staff.xml"); Pathdest=Paths.get("src/main/resources/staff-bom.xml"); writeBomFile(src,dest); } privatestaticvoidwriteBomFile(Pathsrc,Pathdest){ try(FileOutputStreamfos=newFileOutputStream(dest.toFile())){ byte[]BOM={(byte)0xEF,(byte)0xBB,(byte)0xBF}; //addBOM fos.write(BOM); //BOM+srctofos Files.copy(src,fos); }catch(IOExceptione){ e.printStackTrace(); } } } 5.DownloadSourceCode$gitclonehttps://github.com/mkyong/core-java$cdjava-io6.ReferencesWikipedia–ByteordermarkStackoverflow–What’sthedifferencebetweenUTF-8andUTF-8withoutBOM?Java–CreateandwriteafileHowtowritetofileinJava–BufferedWriterSAXError–ContentisnotallowedinprologJava–Howtojoinandsplitbytearrays,bytesJava–HowtoconvertbytearraystoHexBOM:JavaGlossarymkyongFounderofMkyong.com,loveJavaandopensourcestuff.FollowhimonTwitter.Ifyoulikemytutorials,considermakeadonationtothesecharities.CommentsLabelName*Email*LabelName*Email*3CommentsMostVotedNewestOldestInlineFeedbacksViewallcommentsMesut23minutesagoThanks,veryusefulinformation0ReplyEricMalalel1monthagowaslookingforawaytodetectandremovebombeforeusingcsvparser,yoursolutionworksgreet,thanks!0ReplyStanislavTanev1yearagoGreatArticle!Reallyappreciatetheinformation!0Reply



請為這篇文章評分?