PowerShell scripts for creating and reading test files with the ...

文章推薦指數: 80 %
投票人數:10人

WE CAN'T BE SURE THAT THE ENCLOSING FILE WILL HAVE a UTF-8 BOM. # E.G., WHEN DOWNLOADED FROM A Gist (GitHub). # POWERSHELL ITSELF DEFAULTS TO "ANSI" ... Skiptocontent Allgists BacktoGitHub Signin Signup Sign in Sign up {{message}} Instantlysharecode,notes,andsnippets. mklement0/New-EncodingTestFiles.ps1 LastactiveAug16,2022 Star 3 Fork 2 Star Code Revisions 14 Stars 3 Forks 2 Embed Whatwouldyouliketodo? Embed Embedthisgistinyourwebsite. Share Copysharablelinkforthisgist. Clonevia HTTPS ClonewithGitorcheckoutwithSVNusingtherepository’swebaddress. LearnmoreaboutcloneURLs DownloadZIP PowerShellscriptsforcreatingandreadingtestfileswiththestandardUnicodecharacterencodingschemesanddefaultencodings. Raw New-EncodingTestFiles.ps1 ThisfilecontainsbidirectionalUnicodetextthatmaybeinterpretedorcompileddifferentlythanwhatappearsbelow.Toreview,openthefileinaneditorthatrevealshiddenUnicodecharacters. LearnmoreaboutbidirectionalUnicodecharacters Showhiddencharacters param( #IMPORTANT:AVOIDNON-ASCIISTRINGLITERALS,BECAUSE #WECAN'TBESURETHATTHEENCLOSINGFILEWILLHAVEaUTF-8BOM #E.G.,WHENDOWNLOADEDFROMAGist(GitHub). #POWERSHELLITSELFDEFAULTSTO"ANSI"ENCODINGWHENREADINGA #FILEWITHOUTBOM. #THEFOLLOWINGISTHEEQUIVALENTOF'oö'(lowercase'o' #(LATINSMALLLETTERO), #lowercaseumlaut-o(LATINSMALLLETTEROWITHDIAERESIS) [string]$Text=[char[]](0x6f,0xf6)-join'', [Alias('Path')] [string]$LiteralPath='./enc-test' ) $ErrorActionPreference='Stop' $VerbosePreference='Continue' #Instantiateencodersandstoretheminahashtablewhosekeyreflects #theencodingscheme;thekeysareusedasthefilenames. #NOTE:UTF-7isnotincluded,becausetheencoderdoesn'toffercreatingaBOM #andPowerShelldoesn'texpectonewhenreadingwith-EncodingUTF7 #(itreturnsU+FEFFasaliteralchar.) #"utf7"=New-ObjectSystem.Text.UTF7Encoding$False #"utf7o"=New-ObjectSystem.Text.UTF7Encoding$True#allowoptionaldirectchars. #Keylegend: #B...*with*BOM #N...*no*BOM #le,be...littl-endian,big-endian $htEncs=[ordered]@{ "utf8B"=New-ObjectSystem.Text.UTF8Encoding$True#BOM-or-not "utf8N"=New-ObjectSystem.Text.UTF8Encoding$False#BOM-or-not "utf16leB"=New-ObjectSystem.Text.UnicodeEncoding$False,$True#big-or-little-endian,BOM-or-not "utf16leN"=New-ObjectSystem.Text.UnicodeEncoding$False,$False#big-or-little-endian,BOM-or-not "utf16beB"=New-ObjectSystem.Text.UnicodeEncoding$True,$True#big-or-little-endian,BOM-or-not "utf16beN"=New-ObjectSystem.Text.UnicodeEncoding$True,$False#big-or-little-endian,BOM-or-not "utf32leB"=New-ObjectSystem.Text.UTF32Encoding$False,$True#big-or-little-endian,BOM-or-not "utf32leN"=New-ObjectSystem.Text.UTF32Encoding$False,$False#big-or-little-endian,BOM-or-not "utf32beB"=New-ObjectSystem.Text.UTF32Encoding$True,$True#big-or-little-endian,BOM-or-not "utf32beN"=New-ObjectSystem.Text.UTF32Encoding$True,$False#big-or-little-endian,BOM-or-not "default"=[System.Text.Encoding]::Default "sc-default"=$null#UseSet-Content-notethat[System.Text.Encoding]::DefaultisnotthesameinPSCore. "of-default"=$null#UseOut-File(whichusesUTF-16LE) } #Determinefilecontentsforthevariousfiletypes,byfilenameextension. $htTexts=[ordered]@{ '.txt'=$Text #Note:Import-CSVrequiresfield-internal"chars.tobeescapedas"",inlinewithRFC4180 '.csv'=@" Value "$($Text-replace'"','""')" "@ '.clixml'=@" "@ '.psd1'=@" @{ Value='$($Text-replace"'","''")' } "@ } #Determineoutputpathandcreateoutputdir.ondemand if(-not(Test-Path$LiteralPath)){#outputdir.doesn'texist,createit Write-Host-ForegroundColorYellow"OKtocreateoutputdir.?" if(-not(New-Item-ItemTypeDirectory$LiteralPath-Confirm:$true)){exit1} } #Makesurethatthe.NETframeworkusesthesameworkingdir.asPS. [io.directory]::SetCurrentDirectory($PWD.ProviderPath) #Createthefiles. foreach($extin$htTexts.Keys){ foreach($namein$htEncs.Keys){ $enc=$htEncs.$name $txt=$htTexts.$ext $fpath=Join-Path$LiteralPath($name+$ext) write-verbose"Writingto:$fpath" if($name-eq'sc-default'){#UseSet-Content Set-Content-Value$txt-NoNewline-LiteralPath$fpath }elseif($name-eq'of-default'){#UseOut-File Out-File-InputObject$txt-NoNewline-LiteralPath$fpath }else{ [io.file]::WriteAllText($fpath,$txt,$enc) } } } Raw Read-EncodingTestFiles.ps1 ThisfilecontainsbidirectionalUnicodetextthatmaybeinterpretedorcompileddifferentlythanwhatappearsbelow.Toreview,openthefileinaneditorthatrevealshiddenUnicodecharacters. LearnmoreaboutbidirectionalUnicodecharacters Showhiddencharacters [CmdletBinding()] param( #IMPORTANT:AVOIDNON-ASCIISTRINGLITERALS,BECAUSE #WECAN'TBESURETHATTHEENCLOSINGFILEWILLHAVEaUTF-8BOM #E.G.,WHENDOWNLOADEDFROMAGist(GitHub). #POWERSHELLITSELFDEFAULTSTO"ANSI"ENCODINGWHENREADINGA #FILEWITHOUTBOM. #THEFOLLOWINGISTHEEQUIVALENTOF'oö'(lowercase'o' #(LATINSMALLLETTERO), #lowercaseumlaut-o(LATINSMALLLETTEROWITHDIAERESIS) [string]$ReferenceText=[char[]](0x6f,0xf6)-join'',#MatchesNew-EncodingTestFiles'sdefault #Thepath [string]$LiteralPath='./enc-test'#MatchesNew-EncodingTestFiles'sdefault ) #ENSURETHATTHISFILEISUTF-8-ENCODED*WITH*ABOM-otherwisePowerShell #willnotinterpretitcorrectly. functionGet-CodePointList([string]$Text){ '0x6f0xf6' .EXAMPLE >Get-CodePointList'oö' 0x6f0xf6 #> switch($Text){ $null{'(null)';break} ''{'(empty)';break} Default{ [string]([int[]]$Text.ToCharArray()|ForEach-Object{'0x{0:x2}'-f$_}) } } } #Note:Thisisalsonecessarytomakethetry/catchhandlerswork. $ErrorActionPreference='Stop' #Thefilenameextensionandwhatcmdlet(s)toloadthemwith. #NotethatImport-PowerShellDataFileandImport-Clixmldonotsupportthe #-Encodingparameter $htExts=[ordered]@{ '.txt'=@{cmdletName='Get-Content'}, @{cmdletName='Select-String';fixedParams=@{Pattern=$ReferenceText;SimpleMatch=$true}} '.csv'=@{cmdletName='Import-Csv'} '.psd1'=@{cmdletName='Import-PowerShellDataFile'} '.clixml'=@{cmdletName='Import-Clixml'} } #Mapthefilenamerootstothecorresponding-Encodingparametervalues. $htEncodingNames=@{ utf8='utf8' utf16le='Unicode' utf16be='BigEndianUnicode' utf32le='UTF32' utf32be='BigEndianUTF32' 'default'='Default' 'sc-default'='Default' 'of-default'='Unicode' } #Loopoverallfiletypes foreach($extin$htExts.Keys){ $cmdDefs=$htExts.$ext $files=Get-Item-Path"./enc-test/*$ext" Write-Verbose"=============$ext" #Readwithandwithout-Encodingparameter. foreach($cmdDefin$cmdDefs){ $cmd=$cmdDef.cmdletName $htParams=$cmdDef.fixedParams if(-not$htParams){$htParams=@{}} for($pass=1;$pass-le2;++$pass){ #2ndpass:Seeifthecmdletevensupports-Encodingandskip,if #not. if($pass-eq2){ if(-not(Get-Command$cmd).Parameters.ContainsKey('Encoding')){ Write-Verbose"==($cmddoesn'thavean-Encodingparameter)" break } } Write-Verbose"==Using$cmd$(if($pass-eq1){'WITHOUT'}else{'WITHappropriate'})-Encodingparameter:" $htEncodingParamIfAny=@{} foreach($filein$files){ $encName='(default)' if($pass-eq2){ $encName=$htEncodingNames.$($file.BaseName-replace'[NB]$') $htParams.Encoding=$encName#Set-Encodingargument } $exceptionText='' try{ $content=&$cmd$file.FullName@htParams } catch{ $exceptionText="$_" if($_.Exception.ParameterName-eq'Encoding'){ $result="NOTSUPPORTED:$encName" }else{ $result="ERROR" } } if($exceptionText){#readingfailed Write-Verbose"${encName},${cmd}:exceptionoccurred:$exceptionText" }else{#readingsucceeded,butencodingmaynotbecorrect #Extractthestringvaluetotestfromthe*object*thatsomeofthe #cmdletsreturn. if($content-isnot[string]){ if($cmd-eq'Select-String'){#Thematchedlineisinthe.Lineproperty $content=$content.Line }else{#allothershavea.Valueproperty $content=$content.Value } } Write-Verbose"${encName},${cmd}:value:[$content];bytes:$(Get-CodePointList$content)" if($null-eq$content){#valuecouldnotberead $result="NOTHINGREAD" }else{#makesurethatwasreadmatchesthereferencetextcodepointbycodepoint $codePoints=[int[]]$content.ToCharArray() $result=$codePoints.Count-eq2-and$content-eq$ReferenceText $result=('INCORRECTLYDECODED','ok')[$result] } } [pscustomobject]@{Cmdlet=$cmd;Method=('-Encoding','Auto')[$pass-eq1];FileName=$file.Name;Result=$result}#|Out-Default } }#foreach$pass }#foreach$cmd }#foreach$ext Signupforfree tojointhisconversationonGitHub. Alreadyhaveanaccount? Signintocomment Youcan’tperformthatactionatthistime. Yousignedinwithanothertaborwindow.Reloadtorefreshyoursession. Yousignedoutinanothertaborwindow.Reloadtorefreshyoursession.



請為這篇文章評分?