master - GitHub

2025-01-27

文章推薦指數： 80 %

投票人數：10人

codecs -- Python Codec Registry, API and helpers. ... __name__, self.name, id(self)). class Codec ... Defines the interface for stateless encoders/decoders. Skiptocontent {{message}} enthought / Python-2.7.3 Public Notifications Fork 9 Star 0 Code Issues 8 Pullrequests 1 Actions Projects 0 Wiki Security Insights More Code Issues Pullrequests Actions Projects Wiki Security Insights Permalink master Branches Tags Couldnotloadbranches Nothingtoshow {{refName}} default Couldnotloadtags Nothingtoshow {{refName}} default Python-2.7.3/Lib/codecs.py / Jumpto CodecInfo Class __new__ Function __repr__ Function Codec Class encode Function decode Function IncrementalEncoder Class __init__ Function encode Function reset Function getstate Function setstate Function BufferedIncrementalEncoder Class __init__ Function _buffer_encode Function encode Function reset Function getstate Function setstate Function IncrementalDecoder Class __init__ Function decode Function reset Function getstate Function setstate Function BufferedIncrementalDecoder Class __init__ Function _buffer_decode Function decode Function reset Function getstate Function setstate Function StreamWriter Class __init__ Function write Function writelines Function reset Function seek Function __getattr__ Function __enter__ Function __exit__ Function StreamReader Class __init__ Function decode Function read Function readline Function readlines Function reset Function seek Function next Function __iter__ Function __getattr__ Function __enter__ Function __exit__ Function StreamReaderWriter Class __init__ Function read Function readline Function readlines Function next Function __iter__ Function write Function writelines Function reset Function seek Function __getattr__ Function __enter__ Function __exit__ Function StreamRecoder Class __init__ Function read Function readline Function readlines Function next Function __iter__ Function write Function writelines Function reset Function __getattr__ Function __enter__ Function __exit__ Function open Function EncodedFile Function getencoder Function getdecoder Function getincrementalencoder Function getincrementaldecoder Function getreader Function getwriter Function iterencode Function iterdecode Function make_identity_dict Function make_encoding_map Function Gotofile Gotofile T Gotoline L Gotodefinition R Copypath Copypermalink Thiscommitdoesnotbelongtoanybranchonthisrepository,andmaybelongtoaforkoutsideoftherepository. Cannotretrievecontributorsatthistime 1098lines(834sloc) 34.4KB Raw Blame Editthisfile E OpeninGitHubDesktop OpenwithDesktop Viewraw Viewblame ThisfilecontainsbidirectionalUnicodetextthatmaybeinterpretedorcompileddifferentlythanwhatappearsbelow.Toreview,openthefileinaneditorthatrevealshiddenUnicodecharacters. LearnmoreaboutbidirectionalUnicodecharacters Showhiddencharacters """codecs--PythonCodecRegistry,APIandhelpers. WrittenbyMarc-AndreLemburg([email protected]). (c)CopyrightCNRI,AllRightsReserved.NOWARRANTY. """#" import__builtin__,sys ###Registryandbuiltinstatelesscodecfunctions try: from_codecsimport* exceptImportError,why: raiseSystemError('Failedtoloadthebuiltincodecs:%s'%why) __all__=["register","lookup","open","EncodedFile","BOM","BOM_BE", "BOM_LE","BOM32_BE","BOM32_LE","BOM64_BE","BOM64_LE", "BOM_UTF8","BOM_UTF16","BOM_UTF16_LE","BOM_UTF16_BE", "BOM_UTF32","BOM_UTF32_LE","BOM_UTF32_BE", "strict_errors","ignore_errors","replace_errors", "xmlcharrefreplace_errors", "register_error","lookup_error"] ###Constants # #ByteOrderMark(BOM=ZEROWIDTHNO-BREAKSPACE=U+FEFF) #anditspossiblebytestringvalues #forUTF8/UTF16/UTF32outputandlittle/bigendianmachines # #UTF-8 BOM_UTF8='\xef\xbb\xbf' #UTF-16,littleendian BOM_LE=BOM_UTF16_LE='\xff\xfe' #UTF-16,bigendian BOM_BE=BOM_UTF16_BE='\xfe\xff' #UTF-32,littleendian BOM_UTF32_LE='\xff\xfe\x00\x00' #UTF-32,bigendian BOM_UTF32_BE='\x00\x00\xfe\xff' ifsys.byteorder=='little': #UTF-16,nativeendianness BOM=BOM_UTF16=BOM_UTF16_LE #UTF-32,nativeendianness BOM_UTF32=BOM_UTF32_LE else: #UTF-16,nativeendianness BOM=BOM_UTF16=BOM_UTF16_BE #UTF-32,nativeendianness BOM_UTF32=BOM_UTF32_BE #Oldbrokennames(don'tuseinnewcode) BOM32_LE=BOM_UTF16_LE BOM32_BE=BOM_UTF16_BE BOM64_LE=BOM_UTF32_LE BOM64_BE=BOM_UTF32_BE ###Codecbaseclasses(definingtheAPI) classCodecInfo(tuple): def__new__(cls,encode,decode,streamreader=None,streamwriter=None, incrementalencoder=None,incrementaldecoder=None,name=None): self=tuple.__new__(cls,(encode,decode,streamreader,streamwriter)) self.name=name self.encode=encode self.decode=decode self.incrementalencoder=incrementalencoder self.incrementaldecoder=incrementaldecoder self.streamwriter=streamwriter self.streamreader=streamreader returnself def__repr__(self): return""%(self.__class__.__module__,self.__class__.__name__,self.name,id(self)) classCodec: """Definestheinterfaceforstatelessencoders/decoders. The.encode()/.decode()methodsmayusedifferenterror handlingschemesbyprovidingtheerrorsargument.These stringvaluesarepredefined: 'strict'-raiseaValueErrorerror(orasubclass) 'ignore'-ignorethecharacterandcontinuewiththenext 'replace'-replacewithasuitablereplacementcharacter; PythonwillusetheofficialU+FFFDREPLACEMENT CHARACTERforthebuiltinUnicodecodecson decodingand'?'onencoding. 'xmlcharrefreplace'-ReplacewiththeappropriateXML characterreference(onlyforencoding). 'backslashreplace'-Replacewithbackslashedescapesequences (onlyforencoding). Thesetofallowedvaluescanbeextendedviaregister_error. """ defencode(self,input,errors='strict'): """Encodestheobjectinputandreturnsatuple(output object,lengthconsumed). errorsdefinestheerrorhandlingtoapply.Itdefaultsto 'strict'handling. ThemethodmaynotstorestateintheCodecinstance.Use StreamCodecforcodecswhichhavetokeepstateinorderto makeencoding/decodingefficient. Theencodermustbeabletohandlezerolengthinputand returnanemptyobjectoftheoutputobjecttypeinthis situation. """ raiseNotImplementedError defdecode(self,input,errors='strict'): """Decodestheobjectinputandreturnsatuple(output object,lengthconsumed). inputmustbeanobjectwhichprovidesthebf_getreadbuf bufferslot.Pythonstrings,bufferobjectsandmemory mappedfilesareexamplesofobjectsprovidingthisslot. errorsdefinestheerrorhandlingtoapply.Itdefaultsto 'strict'handling. ThemethodmaynotstorestateintheCodecinstance.Use StreamCodecforcodecswhichhavetokeepstateinorderto makeencoding/decodingefficient. Thedecodermustbeabletohandlezerolengthinputand returnanemptyobjectoftheoutputobjecttypeinthis situation. """ raiseNotImplementedError classIncrementalEncoder(object): """ AnIncrementalEncoderencodesaninputinmultiplesteps.Theinputcanbe passedpiecebypiecetotheencode()method.TheIncrementalEncoderremembers thestateoftheEncodingprocessbetweencallstoencode(). """ def__init__(self,errors='strict'): """ CreatesanIncrementalEncoderinstance. TheIncrementalEncodermayusedifferenterrorhandlingschemesby providingtheerrorskeywordargument.Seethemoduledocstring foralistofpossiblevalues. """ self.errors=errors self.buffer="" defencode(self,input,final=False): """ Encodesinputandreturnstheresultingobject. """ raiseNotImplementedError defreset(self): """ Resetstheencodertotheinitialstate. """ defgetstate(self): """ Returnthecurrentstateoftheencoder. """ return0 defsetstate(self,state): """ Setthecurrentstateoftheencoder.statemusthavebeen returnedbygetstate(). """ classBufferedIncrementalEncoder(IncrementalEncoder): """ ThissubclassofIncrementalEncodercanbeusedasthebaseclassforan incrementalencoderiftheencodermustkeepsomeoftheoutputina bufferbetweencallstoencode(). """ def__init__(self,errors='strict'): IncrementalEncoder.__init__(self,errors) self.buffer=""#unencodedinputthatiskeptbetweencallstoencode() def_buffer_encode(self,input,errors,final): #Overwritethismethodinsubclasses:Itmustencodeinput #andreturnan(output,lengthconsumed)tuple raiseNotImplementedError defencode(self,input,final=False): #encodeinput(takingthebufferintoaccount) data=self.buffer+input (result,consumed)=self._buffer_encode(data,self.errors,final) #keepunencodedinputuntilthenextcall self.buffer=data[consumed:] returnresult defreset(self): IncrementalEncoder.reset(self) self.buffer="" defgetstate(self): returnself.bufferor0 defsetstate(self,state): self.buffer=stateor"" classIncrementalDecoder(object): """ AnIncrementalDecoderdecodesaninputinmultiplesteps.Theinputcanbe passedpiecebypiecetothedecode()method.TheIncrementalDecoder remembersthestateofthedecodingprocessbetweencallstodecode(). """ def__init__(self,errors='strict'): """ CreatesaIncrementalDecoderinstance. TheIncrementalDecodermayusedifferenterrorhandlingschemesby providingtheerrorskeywordargument.Seethemoduledocstring foralistofpossiblevalues. """ self.errors=errors defdecode(self,input,final=False): """ Decodesinputandreturnstheresultingobject. """ raiseNotImplementedError defreset(self): """ Resetsthedecodertotheinitialstate. """ defgetstate(self): """ Returnthecurrentstateofthedecoder. Thismustbea(buffered_input,additional_state_info)tuple. buffered_inputmustbeabytesobjectcontainingbytesthat werepassedtodecode()thathavenotyetbeenconverted. additional_state_infomustbeanon-negativeinteger representingthestateofthedecoderWITHOUTyethaving processedthecontentsofbuffered_input.Intheinitialstate andafterreset(),getstate()mustreturn(b"",0). """ return(b"",0) defsetstate(self,state): """ Setthecurrentstateofthedecoder. statemusthavebeenreturnedbygetstate().Theeffectof setstate((b"",0))mustbeequivalenttoreset(). """ classBufferedIncrementalDecoder(IncrementalDecoder): """ ThissubclassofIncrementalDecodercanbeusedasthebaseclassforan incrementaldecoderifthedecodermustbeabletohandleincompletebyte sequences. """ def__init__(self,errors='strict'): IncrementalDecoder.__init__(self,errors) self.buffer=""#undecodedinputthatiskeptbetweencallstodecode() def_buffer_decode(self,input,errors,final): #Overwritethismethodinsubclasses:Itmustdecodeinput #andreturnan(output,lengthconsumed)tuple raiseNotImplementedError defdecode(self,input,final=False): #decodeinput(takingthebufferintoaccount) data=self.buffer+input (result,consumed)=self._buffer_decode(data,self.errors,final) #keepundecodedinputuntilthenextcall self.buffer=data[consumed:] returnresult defreset(self): IncrementalDecoder.reset(self) self.buffer="" defgetstate(self): #additionalstateinfoisalways0 return(self.buffer,0) defsetstate(self,state): #ignoreadditionalstateinfo self.buffer=state[0] # #TheStreamWriterandStreamReaderclassprovidegenericworking #interfaceswhichcanbeusedtoimplementnewencodingsubmodules #veryeasily.Seeencodings/utf_8.pyforanexampleonhowthisis #done. # classStreamWriter(Codec): def__init__(self,stream,errors='strict'): """CreatesaStreamWriterinstance. streammustbeafile-likeobjectopenforwriting (binary)data. TheStreamWritermayusedifferenterrorhandling schemesbyprovidingtheerrorskeywordargument.These parametersarepredefined: 'strict'-raiseaValueError(orasubclass) 'ignore'-ignorethecharacterandcontinuewiththenext 'replace'-replacewithasuitablereplacementcharacter 'xmlcharrefreplace'-ReplacewiththeappropriateXML characterreference. 'backslashreplace'-Replacewithbackslashedescape sequences(onlyforencoding). Thesetofallowedparametervaluescanbeextendedvia register_error. """ self.stream=stream self.errors=errors defwrite(self,object): """Writestheobject'scontentsencodedtoself.stream. """ data,consumed=self.encode(object,self.errors) self.stream.write(data) defwritelines(self,list): """Writestheconcatenatedlistofstringstothestream using.write(). """ self.write(''.join(list)) defreset(self): """Flushesandresetsthecodecbuffersusedforkeepingstate. Callingthismethodshouldensurethatthedataonthe outputisputintoacleanstate,thatallowsappending ofnewfreshdatawithouthavingtorescanthewhole streamtorecoverstate. """ pass defseek(self,offset,whence=0): self.stream.seek(offset,whence) ifwhence==0andoffset==0: self.reset() def__getattr__(self,name, getattr=getattr): """Inheritallothermethodsfromtheunderlyingstream. """ returngetattr(self.stream,name) def__enter__(self): returnself def__exit__(self,type,value,tb): self.stream.close() ### classStreamReader(Codec): def__init__(self,stream,errors='strict'): """CreatesaStreamReaderinstance. streammustbeafile-likeobjectopenforreading (binary)data. TheStreamReadermayusedifferenterrorhandling schemesbyprovidingtheerrorskeywordargument.These parametersarepredefined: 'strict'-raiseaValueError(orasubclass) 'ignore'-ignorethecharacterandcontinuewiththenext 'replace'-replacewithasuitablereplacementcharacter; Thesetofallowedparametervaluescanbeextendedvia register_error. """ self.stream=stream self.errors=errors self.bytebuffer="" #Forstr->strdecodingthiswillstayastr #Forstr->unicodedecodingthefirstreadwillpromoteittounicode self.charbuffer="" self.linebuffer=None defdecode(self,input,errors='strict'): raiseNotImplementedError defread(self,size=-1,chars=-1,firstline=False): """Decodesdatafromthestreamself.streamandreturnsthe resultingobject. charsindicatesthenumberofcharacterstoreadfromthe stream.read()willneverreturnmorethanchars characters,butitmightreturnless,iftherearenotenough charactersavailable. sizeindicatestheapproximatemaximumnumberofbytesto readfromthestreamfordecodingpurposes.Thedecoder canmodifythissettingasappropriate.Thedefaultvalue -1indicatestoreadanddecodeasmuchaspossible.size isintendedtopreventhavingtodecodehugefilesinone step. Iffirstlineistrue,andaUnicodeDecodeErrorhappens afterthefirstlineterminatorintheinputonlythefirstline willbereturned,therestoftheinputwillbekeptuntilthe nextcalltoread(). Themethodshoulduseagreedyreadstrategymeaningthat itshouldreadasmuchdataasisallowedwithinthe definitionoftheencodingandthegivensize,e.g.if optionalencodingendingsorstatemarkersareavailable onthestream,theseshouldbereadtoo. """ #Ifwehavelinescached,firstmergethembackintocharacters ifself.linebuffer: self.charbuffer="".join(self.linebuffer) self.linebuffer=None #readuntilwegettherequirednumberofcharacters(ifavailable) whileTrue: #cantherequestcanbesatisfiedfromthecharacterbuffer? ifchars<0: ifsize<0: ifself.charbuffer: break eliflen(self.charbuffer)>=size: break else: iflen(self.charbuffer)>=chars: break #weneedmoredata ifsize<0: newdata=self.stream.read() else: newdata=self.stream.read(size) #decodebytes(thoseremainingfromthelastcallincluded) data=self.bytebuffer+newdata try: newchars,decodedbytes=self.decode(data,self.errors) exceptUnicodeDecodeError,exc: iffirstline: newchars,decodedbytes=self.decode(data[:exc.start],self.errors) lines=newchars.splitlines(True) iflen(lines)<=1: raise else: raise #keepundecodedbytesuntilthenextcall self.bytebuffer=data[decodedbytes:] #putnewcharactersinthecharacterbuffer self.charbuffer+=newchars #therewasnodataavailable ifnotnewdata: break ifchars<0: #Returneverythingwe'vegot result=self.charbuffer self.charbuffer="" else: #Returnthefirstcharscharacters result=self.charbuffer[:chars] self.charbuffer=self.charbuffer[chars:] returnresult defreadline(self,size=None,keepends=True): """Readonelinefromtheinputstreamandreturnthe decodeddata. size,ifgiven,ispassedassizeargumenttothe read()method. """ #Ifwehavelinescachedfromanearlierread,return #themunconditionally ifself.linebuffer: line=self.linebuffer[0] delself.linebuffer[0] iflen(self.linebuffer)==1: #reverttocharbuffermode;wemightneedmoredata #nexttime self.charbuffer=self.linebuffer[0] self.linebuffer=None ifnotkeepends: line=line.splitlines(False)[0] returnline readsize=sizeor72 line="" #Ifsizeisgiven,wecallread()onlyonce whileTrue: data=self.read(readsize,firstline=True) ifdata: #Ifwe'reata"\r"readoneextracharacter(whichmight #bea"\n")togetaproperlineending.Ifthestreamis #temporarilyexhaustedwereturnthewronglineending. ifdata.endswith("\r"): data+=self.read(size=1,chars=1) line+=data lines=line.splitlines(True) iflines: iflen(lines)>1: #Morethanonelineresult;thefirstlineisafullline #toreturn line=lines[0] dellines[0] iflen(lines)>1: #cachetheremaininglines lines[-1]+=self.charbuffer self.linebuffer=lines self.charbuffer=None else: #onlyoneremainingline,putitbackintocharbuffer self.charbuffer=lines[0]+self.charbuffer ifnotkeepends: line=line.splitlines(False)[0] break line0withend=lines[0] line0withoutend=lines[0].splitlines(False)[0] ifline0withend!=line0withoutend:#Wereallyhavealineend #Puttherestbacktogetherandkeepituntilthenextcall self.charbuffer="".join(lines[1:])+self.charbuffer ifkeepends: line=line0withend else: line=line0withoutend break #wedidn'tgetanythingorthiswasouronlytry ifnotdataorsizeisnotNone: iflineandnotkeepends: line=line.splitlines(False)[0] break ifreadsize<8000: readsize*=2 returnline defreadlines(self,sizehint=None,keepends=True): """Readalllinesavailableontheinputstream andreturnthemaslistoflines. Linebreaksareimplementedusingthecodec'sdecoder methodandareincludedinthelistentries. sizehint,ifgiven,isignoredsincethereisnoefficient waytofindingthetrueend-of-line. """ data=self.read() returndata.splitlines(keepends) defreset(self): """Resetsthecodecbuffersusedforkeepingstate. Notethatnostreamrepositioningshouldtakeplace. Thismethodisprimarilyintendedtobeabletorecover fromdecodingerrors. """ self.bytebuffer="" self.charbuffer=u"" self.linebuffer=None defseek(self,offset,whence=0): """Settheinputstream'scurrentposition. Resetsthecodecbuffersusedforkeepingstate. """ self.stream.seek(offset,whence) self.reset() defnext(self): """Returnthenextdecodedlinefromtheinputstream.""" line=self.readline() ifline: returnline raiseStopIteration def__iter__(self): returnself def__getattr__(self,name, getattr=getattr): """Inheritallothermethodsfromtheunderlyingstream. """ returngetattr(self.stream,name) def__enter__(self): returnself def__exit__(self,type,value,tb): self.stream.close() ### classStreamReaderWriter: """StreamReaderWriterinstancesallowwrappingstreamswhich workinbothreadandwritemodes. Thedesignissuchthatonecanusethefactoryfunctions returnedbythecodec.lookup()functiontoconstructthe instance. """ #Optionalattributessetbythefilewrappersbelow encoding='unknown' def__init__(self,stream,Reader,Writer,errors='strict'): """CreatesaStreamReaderWriterinstance. streammustbeaStream-likeobject. Reader,Writermustbefactoryfunctionsorclasses providingtheStreamReader,StreamWriterinterfaceresp. Errorhandlingisdoneinthesamewayasdefinedforthe StreamWriter/Readers. """ self.stream=stream self.reader=Reader(stream,errors) self.writer=Writer(stream,errors) self.errors=errors defread(self,size=-1): returnself.reader.read(size) defreadline(self,size=None): returnself.reader.readline(size) defreadlines(self,sizehint=None): returnself.reader.readlines(sizehint) defnext(self): """Returnthenextdecodedlinefromtheinputstream.""" returnself.reader.next() def__iter__(self): returnself defwrite(self,data): returnself.writer.write(data) defwritelines(self,list): returnself.writer.writelines(list) defreset(self): self.reader.reset() self.writer.reset() defseek(self,offset,whence=0): self.stream.seek(offset,whence) self.reader.reset() ifwhence==0andoffset==0: self.writer.reset() def__getattr__(self,name, getattr=getattr): """Inheritallothermethodsfromtheunderlyingstream. """ returngetattr(self.stream,name) #theseareneededtomake"withcodecs.open(...)"workproperly def__enter__(self): returnself def__exit__(self,type,value,tb): self.stream.close() ### classStreamRecoder: """StreamRecoderinstancesprovideafrontend-backend viewofencodingdata. TheyusethecompletesetofAPIsreturnedbythe codecs.lookup()functiontoimplementtheirtask. Datawrittentothestreamisfirstdecodedintoan intermediateformat(whichisdependentonthegivencodec combination)andthenwrittentothestreamusinganinstance oftheprovidedWriterclass. Intheotherdirection,dataisreadfromthestreamusinga Readerinstanceandthenreturnencodeddatatothecaller. """ #Optionalattributessetbythefilewrappersbelow data_encoding='unknown' file_encoding='unknown' def__init__(self,stream,encode,decode,Reader,Writer, errors='strict'): """CreatesaStreamRecoderinstancewhichimplementsatwo-way conversion:encodeanddecodeworkonthefrontend(the inputto.read()andoutputof.write())while ReaderandWriterworkonthebackend(readingand writingtothestream). Youcanusetheseobjectstodotransparentdirect recodingsfrome.g.latin-1toutf-8andback. streammustbeafile-likeobject. encode,decodemustadheretotheCodecinterface,Reader, Writermustbefactoryfunctionsorclassesprovidingthe StreamReader,StreamWriterinterfaceresp. encodeanddecodeareneededforthefrontendtranslation, ReaderandWriterforthebackendtranslation.Unicodeis usedasintermediateencoding. Errorhandlingisdoneinthesamewayasdefinedforthe StreamWriter/Readers. """ self.stream=stream self.encode=encode self.decode=decode self.reader=Reader(stream,errors) self.writer=Writer(stream,errors) self.errors=errors defread(self,size=-1): data=self.reader.read(size) data,bytesencoded=self.encode(data,self.errors) returndata defreadline(self,size=None): ifsizeisNone: data=self.reader.readline() else: data=self.reader.readline(size) data,bytesencoded=self.encode(data,self.errors) returndata defreadlines(self,sizehint=None): data=self.reader.read() data,bytesencoded=self.encode(data,self.errors) returndata.splitlines(1) defnext(self): """Returnthenextdecodedlinefromtheinputstream.""" data=self.reader.next() data,bytesencoded=self.encode(data,self.errors) returndata def__iter__(self): returnself defwrite(self,data): data,bytesdecoded=self.decode(data,self.errors) returnself.writer.write(data) defwritelines(self,list): data=''.join(list) data,bytesdecoded=self.decode(data,self.errors) returnself.writer.write(data) defreset(self): self.reader.reset() self.writer.reset() def__getattr__(self,name, getattr=getattr): """Inheritallothermethodsfromtheunderlyingstream. """ returngetattr(self.stream,name) def__enter__(self): returnself def__exit__(self,type,value,tb): self.stream.close() ###Shortcuts defopen(filename,mode='rb',encoding=None,errors='strict',buffering=1): """Openanencodedfileusingthegivenmodeandreturn awrappedversionprovidingtransparentencoding/decoding. Note:Thewrappedversionwillonlyaccepttheobjectformat definedbythecodecs,i.e.Unicodeobjectsformostbuiltin codecs.Outputisalsocodecdependentandwillusuallybe Unicodeaswell. Filesarealwaysopenedinbinarymode,evenifnobinarymode wasspecified.Thisisdonetoavoiddatalossduetoencodings using8-bitvalues.Thedefaultfilemodeis'rb'meaningto openthefileinbinaryreadmode. encodingspecifiestheencodingwhichistobeusedforthe file. errorsmaybegiventodefinetheerrorhandling.Itdefaults to'strict'whichcausesValueErrorstoberaisedincasean encodingerroroccurs. bufferinghasthesamemeaningasforthebuiltinopen()API. Itdefaultstolinebuffered. Thereturnedwrappedfileobjectprovidesanextraattribute .encodingwhichallowsqueryingtheusedencoding.This attributeisonlyavailableifanencodingwasspecifiedas parameter. """ ifencodingisnotNone: if'U'inmode: #Noautomaticconversionof'\n'isdoneonreadingandwriting mode=mode.strip().replace('U','') ifmode[:1]notinset('rwa'): mode='r'+mode if'b'notinmode: #Forceopeningofthefileinbinarymode mode=mode+'b' file=__builtin__.open(filename,mode,buffering) ifencodingisNone: returnfile info=lookup(encoding) srw=StreamReaderWriter(file,info.streamreader,info.streamwriter,errors) #Addattributestosimplifyintrospection srw.encoding=encoding returnsrw defEncodedFile(file,data_encoding,file_encoding=None,errors='strict'): """Returnawrappedversionoffilewhichprovidestransparent encodingtranslation. Stringswrittentothewrappedfileareinterpretedaccording tothegivendata_encodingandthenwrittentotheoriginal fileasstringusingfile_encoding.Theintermediateencoding willusuallybeUnicodebutdependsonthespecifiedcodecs. Stringsarereadfromthefileusingfile_encodingandthen passedbacktothecallerasstringusingdata_encoding. Iffile_encodingisnotgiven,itdefaultstodata_encoding. errorsmaybegiventodefinetheerrorhandling.Itdefaults to'strict'whichcausesValueErrorstoberaisedincasean encodingerroroccurs. Thereturnedwrappedfileobjectprovidestwoextraattributes .data_encodingand.file_encodingwhichreflectthegiven parametersofthesamename.Theattributescanbeusedfor introspectionbyPythonprograms. """ iffile_encodingisNone: file_encoding=data_encoding data_info=lookup(data_encoding) file_info=lookup(file_encoding) sr=StreamRecoder(file,data_info.encode,data_info.decode, file_info.streamreader,file_info.streamwriter,errors) #Addattributestosimplifyintrospection sr.data_encoding=data_encoding sr.file_encoding=file_encoding returnsr ###Helpersforcodeclookup defgetencoder(encoding): """Lookupupthecodecforthegivenencodingandreturn itsencoderfunction. RaisesaLookupErrorincasetheencodingcannotbefound. """ returnlookup(encoding).encode defgetdecoder(encoding): """Lookupupthecodecforthegivenencodingandreturn itsdecoderfunction. RaisesaLookupErrorincasetheencodingcannotbefound. """ returnlookup(encoding).decode defgetincrementalencoder(encoding): """Lookupupthecodecforthegivenencodingandreturn itsIncrementalEncoderclassorfactoryfunction. RaisesaLookupErrorincasetheencodingcannotbefound orthecodecsdoesn'tprovideanincrementalencoder. """ encoder=lookup(encoding).incrementalencoder ifencoderisNone: raiseLookupError(encoding) returnencoder defgetincrementaldecoder(encoding): """Lookupupthecodecforthegivenencodingandreturn itsIncrementalDecoderclassorfactoryfunction. RaisesaLookupErrorincasetheencodingcannotbefound orthecodecsdoesn'tprovideanincrementaldecoder. """ decoder=lookup(encoding).incrementaldecoder ifdecoderisNone: raiseLookupError(encoding) returndecoder defgetreader(encoding): """Lookupupthecodecforthegivenencodingandreturn itsStreamReaderclassorfactoryfunction. RaisesaLookupErrorincasetheencodingcannotbefound. """ returnlookup(encoding).streamreader defgetwriter(encoding): """Lookupupthecodecforthegivenencodingandreturn itsStreamWriterclassorfactoryfunction. RaisesaLookupErrorincasetheencodingcannotbefound. """ returnlookup(encoding).streamwriter defiterencode(iterator,encoding,errors='strict',**kwargs): """ Encodingiterator. EncodestheinputstringsfromtheiteratorusingaIncrementalEncoder. errorsandkwargsarepassedthroughtotheIncrementalEncoder constructor. """ encoder=getincrementalencoder(encoding)(errors,**kwargs) forinputiniterator: output=encoder.encode(input) ifoutput: yieldoutput output=encoder.encode("",True) ifoutput: yieldoutput defiterdecode(iterator,encoding,errors='strict',**kwargs): """ Decodingiterator. DecodestheinputstringsfromtheiteratorusingaIncrementalDecoder. errorsandkwargsarepassedthroughtotheIncrementalDecoder constructor. """ decoder=getincrementaldecoder(encoding)(errors,**kwargs) forinputiniterator: output=decoder.decode(input) ifoutput: yieldoutput output=decoder.decode("",True) ifoutput: yieldoutput ###Helpersforcharmap-basedcodecs defmake_identity_dict(rng): """make_identity_dict(rng)->dict Returnadictionarywhereelementsoftherngsequenceare mappedtothemselves. """ res={} foriinrng: res[i]=i returnres defmake_encoding_map(decoding_map): """Createsanencodingmapfromadecodingmap. Ifatargetmappinginthedecodingmapoccursmultiple times,thenthattargetismappedtoNone(undefinedmapping), causinganexceptionwhenencounteredbythecharmapcodec duringtranslation. Oneexamplewherethishappensiscp875.pywhichdecodes multiplecharacterto\u001a. """ m={} fork,vindecoding_map.items(): ifnotvinm: m[v]=k else: m[v]=None returnm ###errorhandlers try: strict_errors=lookup_error("strict") ignore_errors=lookup_error("ignore") replace_errors=lookup_error("replace") xmlcharrefreplace_errors=lookup_error("xmlcharrefreplace") backslashreplace_errors=lookup_error("backslashreplace") exceptLookupError: #In--disable-unicodebuilds,theseerrorhandleraremissing strict_errors=None ignore_errors=None replace_errors=None xmlcharrefreplace_errors=None backslashreplace_errors=None #Tellmodulefinderthatusingcodecsprobablyneedstheencodings #package _false=0 if_false: importencodings ###Tests if__name__=='__main__': #MakestdouttranslateLatin-1outputintoUTF-8output sys.stdout=EncodedFile(sys.stdout,'latin-1','utf-8') #HavestdintranslateLatin-1inputintoUTF-8input sys.stdin=EncodedFile(sys.stdin,'utf-8','latin-1') Copylines Copypermalink Viewgitblame Referenceinnewissue Go Youcan’tperformthatactionatthistime. Yousignedinwithanothertaborwindow.Reloadtorefreshyoursession. Yousignedoutinanothertaborwindow.Reloadtorefreshyoursession.