cpython/codecs.c at main · python/cpython - GitHub

文章推薦指數: 80 %
投票人數:10人

Python Codec Registry and support functions. Written by Marc-Andre Lemburg ([email protected]). Copyright (c) Corporation for National Research Initiatives. Skiptocontent {{message}} python / cpython Public Notifications Fork 24.4k Star 48k Code Issues 5k+ Pullrequests 1.5k Actions Projects 27 Security Insights More Code Issues Pullrequests Actions Projects Security Insights Permalink main Branches Tags Couldnotloadbranches Nothingtoshow {{refName}} default Couldnotloadtags Nothingtoshow {{refName}} default cpython/Python/codecs.c Gotofile Gotofile T Gotoline L Copypath Copypermalink Thiscommitdoesnotbelongtoanybranchonthisrepository,andmaybelongtoaforkoutsideoftherepository.     Cannotretrievecontributorsatthistime 1536lines(1379sloc) 45KB Raw Blame Editthisfile E OpeninGitHubDesktop OpenwithDesktop Viewraw Viewblame ThisfilecontainsbidirectionalUnicodetextthatmaybeinterpretedorcompileddifferentlythanwhatappearsbelow.Toreview,openthefileinaneditorthatrevealshiddenUnicodecharacters. LearnmoreaboutbidirectionalUnicodecharacters Showhiddencharacters /*------------------------------------------------------------------------ PythonCodecRegistryandsupportfunctions WrittenbyMarc-AndreLemburg([email protected]). Copyright(c)CorporationforNationalResearchInitiatives. ------------------------------------------------------------------------*/ #include"Python.h" #include"pycore_call.h"//_PyObject_CallNoArgs() #include"pycore_interp.h"//PyInterpreterState.codec_search_path #include"pycore_pystate.h"//_PyInterpreterState_GET() #include"pycore_ucnhash.h"//_PyUnicode_Name_CAPI #include constchar*Py_hexdigits="0123456789abcdef"; /*---CodecRegistry-----------------------------------------------------*/ /*Importthestandardencodingspackagewhichwillregisterthefirst codecsearchfunction. ThisisdoneinalazywaysothattheUnicodeimplementationdoes notdowngradestartuptimeofscriptsnotneedingit. ImportErrorsaresilentlyignoredbythisfunction.Onlyonetryis made. */ staticint_PyCodecRegistry_Init(void);/*Forward*/ intPyCodec_Register(PyObject*search_function) { PyInterpreterState*interp=_PyInterpreterState_GET(); if(interp->codec_search_path==NULL&&_PyCodecRegistry_Init()) gotoonError; if(search_function==NULL){ PyErr_BadArgument(); gotoonError; } if(!PyCallable_Check(search_function)){ PyErr_SetString(PyExc_TypeError,"argumentmustbecallable"); gotoonError; } returnPyList_Append(interp->codec_search_path,search_function); onError: return-1; } int PyCodec_Unregister(PyObject*search_function) { PyInterpreterState*interp=PyInterpreterState_Get(); PyObject*codec_search_path=interp->codec_search_path; /*Donothingifcodec_search_pathisnotcreatedyetorwascleared.*/ if(codec_search_path==NULL){ return0; } assert(PyList_CheckExact(codec_search_path)); Py_ssize_tn=PyList_GET_SIZE(codec_search_path); for(Py_ssize_ti=0;icodec_search_cache!=NULL){ assert(PyDict_CheckExact(interp->codec_search_cache)); PyDict_Clear(interp->codec_search_cache); } returnPyList_SetSlice(codec_search_path,i,i+1,NULL); } } return0; } externint_Py_normalize_encoding(constchar*,char*,size_t); /*ConvertastringtoanormalizedPythonstring(decodedfromUTF-8):allcharactersare convertedtolowercase,spacesandhyphensarereplacedwithunderscores.*/ static PyObject*normalizestring(constchar*string) { size_tlen=strlen(string); char*encoding; PyObject*v; if(len>PY_SSIZE_T_MAX){ PyErr_SetString(PyExc_OverflowError,"stringistoolarge"); returnNULL; } encoding=PyMem_Malloc(len+1); if(encoding==NULL) returnPyErr_NoMemory(); if(!_Py_normalize_encoding(string,encoding,len+1)) { PyErr_SetString(PyExc_RuntimeError,"_Py_normalize_encoding()failed"); PyMem_Free(encoding); returnNULL; } v=PyUnicode_FromString(encoding); PyMem_Free(encoding); returnv; } /*Lookupthegivenencodingandreturnatupleprovidingthecodec facilities. Theencodingstringislookedupconvertedtoalllower-case characters.Thismakesencodingslookedupthroughthismechanism effectivelycase-insensitive. Ifnocodecisfound,aLookupErrorissetandNULLreturned. Assideeffect,thistriestoloadtheencodingspackage,ifnot yetdone.Thisispartofthelazyloadstrategyfortheencodings package. */ PyObject*_PyCodec_Lookup(constchar*encoding) { if(encoding==NULL){ PyErr_BadArgument(); returnNULL; } PyInterpreterState*interp=_PyInterpreterState_GET(); if(interp->codec_search_path==NULL&&_PyCodecRegistry_Init()){ returnNULL; } /*ConverttheencodingtoanormalizedPythonstring:all charactersareconvertedtolowercase,spacesandhyphensare replacedwithunderscores.*/ PyObject*v=normalizestring(encoding); if(v==NULL){ returnNULL; } PyUnicode_InternInPlace(&v); /*First,trytolookupthenameintheregistrydictionary*/ PyObject*result=PyDict_GetItemWithError(interp->codec_search_cache,v); if(result!=NULL){ Py_INCREF(result); Py_DECREF(v); returnresult; } elseif(PyErr_Occurred()){ gotoonError; } /*Next,scanthesearchfunctionsinorderofregistration*/ constPy_ssize_tlen=PyList_Size(interp->codec_search_path); if(len<0) gotoonError; if(len==0){ PyErr_SetString(PyExc_LookupError, "nocodecsearchfunctionsregistered:" "can'tfindencoding"); gotoonError; } Py_ssize_ti; for(i=0;icodec_search_path,i); if(func==NULL) gotoonError; result=PyObject_CallOneArg(func,v); if(result==NULL) gotoonError; if(result==Py_None){ Py_DECREF(result); continue; } if(!PyTuple_Check(result)||PyTuple_GET_SIZE(result)!=4){ PyErr_SetString(PyExc_TypeError, "codecsearchfunctionsmustreturn4-tuples"); Py_DECREF(result); gotoonError; } break; } if(i==len){ /*XXXPerhapsweshouldcachemissestoo?*/ PyErr_Format(PyExc_LookupError, "unknownencoding:%s",encoding); gotoonError; } /*Cacheandreturntheresult*/ if(PyDict_SetItem(interp->codec_search_cache,v,result)<0){ Py_DECREF(result); gotoonError; } Py_DECREF(v); returnresult; onError: Py_DECREF(v); returnNULL; } /*CodecregistryencodingcheckAPI.*/ intPyCodec_KnownEncoding(constchar*encoding) { PyObject*codecs; codecs=_PyCodec_Lookup(encoding); if(!codecs){ PyErr_Clear(); return0; } else{ Py_DECREF(codecs); return1; } } static PyObject*args_tuple(PyObject*object, constchar*errors) { PyObject*args; args=PyTuple_New(1+(errors!=NULL)); if(args==NULL) returnNULL; Py_INCREF(object); PyTuple_SET_ITEM(args,0,object); if(errors){ PyObject*v; v=PyUnicode_FromString(errors); if(v==NULL){ Py_DECREF(args); returnNULL; } PyTuple_SET_ITEM(args,1,v); } returnargs; } /*Helperfunctiontogetacodecitem*/ static PyObject*codec_getitem(constchar*encoding,intindex) { PyObject*codecs; PyObject*v; codecs=_PyCodec_Lookup(encoding); if(codecs==NULL) returnNULL; v=PyTuple_GET_ITEM(codecs,index); Py_DECREF(codecs); Py_INCREF(v); returnv; } /*Helperfunctionstocreateanincrementalcodec.*/ static PyObject*codec_makeincrementalcodec(PyObject*codec_info, constchar*errors, constchar*attrname) { PyObject*ret,*inccodec; inccodec=PyObject_GetAttrString(codec_info,attrname); if(inccodec==NULL) returnNULL; if(errors) ret=PyObject_CallFunction(inccodec,"s",errors); else ret=_PyObject_CallNoArgs(inccodec); Py_DECREF(inccodec); returnret; } static PyObject*codec_getincrementalcodec(constchar*encoding, constchar*errors, constchar*attrname) { PyObject*codec_info,*ret; codec_info=_PyCodec_Lookup(encoding); if(codec_info==NULL) returnNULL; ret=codec_makeincrementalcodec(codec_info,errors,attrname); Py_DECREF(codec_info); returnret; } /*Helperfunctiontocreateastreamcodec.*/ static PyObject*codec_getstreamcodec(constchar*encoding, PyObject*stream, constchar*errors, constintindex) { PyObject*codecs,*streamcodec,*codeccls; codecs=_PyCodec_Lookup(encoding); if(codecs==NULL) returnNULL; codeccls=PyTuple_GET_ITEM(codecs,index); if(errors!=NULL) streamcodec=PyObject_CallFunction(codeccls,"Os",stream,errors); else streamcodec=PyObject_CallOneArg(codeccls,stream); Py_DECREF(codecs); returnstreamcodec; } /*Helperstoworkwiththeresultof_PyCodec_Lookup */ PyObject*_PyCodecInfo_GetIncrementalDecoder(PyObject*codec_info, constchar*errors) { returncodec_makeincrementalcodec(codec_info,errors, "incrementaldecoder"); } PyObject*_PyCodecInfo_GetIncrementalEncoder(PyObject*codec_info, constchar*errors) { returncodec_makeincrementalcodec(codec_info,errors, "incrementalencoder"); } /*ConvenienceAPIstoquerytheCodecregistry. AllAPIsreturnacodecobjectwithincrementedrefcount. */ PyObject*PyCodec_Encoder(constchar*encoding) { returncodec_getitem(encoding,0); } PyObject*PyCodec_Decoder(constchar*encoding) { returncodec_getitem(encoding,1); } PyObject*PyCodec_IncrementalEncoder(constchar*encoding, constchar*errors) { returncodec_getincrementalcodec(encoding,errors,"incrementalencoder"); } PyObject*PyCodec_IncrementalDecoder(constchar*encoding, constchar*errors) { returncodec_getincrementalcodec(encoding,errors,"incrementaldecoder"); } PyObject*PyCodec_StreamReader(constchar*encoding, PyObject*stream, constchar*errors) { returncodec_getstreamcodec(encoding,stream,errors,2); } PyObject*PyCodec_StreamWriter(constchar*encoding, PyObject*stream, constchar*errors) { returncodec_getstreamcodec(encoding,stream,errors,3); } /*Helperthattriestoensurethereportedexceptionchainindicatesthe *codecthatwasinvokedtotriggerthefailurewithoutchangingthetype *oftheexceptionraised. */ staticvoid wrap_codec_error(constchar*operation, constchar*encoding) { /*TrySetFromCausewillreplacetheactiveexceptionwithasuitably *updatedcloneifitcan,otherwiseitwillleavetheoriginal *exceptionalone. */ _PyErr_TrySetFromCause("%swith'%s'codecfailed", operation,encoding); } /*Encodeanobject(e.g.aUnicodeobject)usingthegivenencoding andreturntheresultingencodedobject(usuallyaPythonstring). errorsispassedtotheencoderfactoryasargumentifnon-NULL.*/ staticPyObject* _PyCodec_EncodeInternal(PyObject*object, PyObject*encoder, constchar*encoding, constchar*errors) { PyObject*args=NULL,*result=NULL; PyObject*v=NULL; args=args_tuple(object,errors); if(args==NULL) gotoonError; result=PyObject_Call(encoder,args,NULL); if(result==NULL){ wrap_codec_error("encoding",encoding); gotoonError; } if(!PyTuple_Check(result)|| PyTuple_GET_SIZE(result)!=2){ PyErr_SetString(PyExc_TypeError, "encodermustreturnatuple(object,integer)"); gotoonError; } v=PyTuple_GET_ITEM(result,0); Py_INCREF(v); /*Wedon'tcheckorusethesecond(integer)entry.*/ Py_DECREF(args); Py_DECREF(encoder); Py_DECREF(result); returnv; onError: Py_XDECREF(result); Py_XDECREF(args); Py_XDECREF(encoder); returnNULL; } /*Decodeanobject(usuallyaPythonstring)usingthegivenencoding andreturnanequivalentobject(e.g.aUnicodeobject). errorsispassedtothedecoderfactoryasargumentifnon-NULL.*/ staticPyObject* _PyCodec_DecodeInternal(PyObject*object, PyObject*decoder, constchar*encoding, constchar*errors) { PyObject*args=NULL,*result=NULL; PyObject*v; args=args_tuple(object,errors); if(args==NULL) gotoonError; result=PyObject_Call(decoder,args,NULL); if(result==NULL){ wrap_codec_error("decoding",encoding); gotoonError; } if(!PyTuple_Check(result)|| PyTuple_GET_SIZE(result)!=2){ PyErr_SetString(PyExc_TypeError, "decodermustreturnatuple(object,integer)"); gotoonError; } v=PyTuple_GET_ITEM(result,0); Py_INCREF(v); /*Wedon'tcheckorusethesecond(integer)entry.*/ Py_DECREF(args); Py_DECREF(decoder); Py_DECREF(result); returnv; onError: Py_XDECREF(args); Py_XDECREF(decoder); Py_XDECREF(result); returnNULL; } /*Genericencoding/decodingAPI*/ PyObject*PyCodec_Encode(PyObject*object, constchar*encoding, constchar*errors) { PyObject*encoder; encoder=PyCodec_Encoder(encoding); if(encoder==NULL) returnNULL; return_PyCodec_EncodeInternal(object,encoder,encoding,errors); } PyObject*PyCodec_Decode(PyObject*object, constchar*encoding, constchar*errors) { PyObject*decoder; decoder=PyCodec_Decoder(encoding); if(decoder==NULL) returnNULL; return_PyCodec_DecodeInternal(object,decoder,encoding,errors); } /*Textencoding/decodingAPI*/ PyObject*_PyCodec_LookupTextEncoding(constchar*encoding, constchar*alternate_command) { PyObject*codec; PyObject*attr; intis_text_codec; codec=_PyCodec_Lookup(encoding); if(codec==NULL) returnNULL; /*Backwardscompatibility:assumeanyrawtupledescribesatext *encoding,andthesameforanythinglackingtheprivate *attribute. */ if(!PyTuple_CheckExact(codec)){ if(_PyObject_LookupAttr(codec,&_Py_ID(_is_text_encoding),&attr)<0){ Py_DECREF(codec); returnNULL; } if(attr!=NULL){ is_text_codec=PyObject_IsTrue(attr); Py_DECREF(attr); if(is_text_codec<=0){ Py_DECREF(codec); if(!is_text_codec) PyErr_Format(PyExc_LookupError, "'%.400s'isnotatextencoding;" "use%stohandlearbitrarycodecs", encoding,alternate_command); returnNULL; } } } /*Thisappearstobeavalidtextencoding*/ returncodec; } static PyObject*codec_getitem_checked(constchar*encoding, constchar*alternate_command, intindex) { PyObject*codec; PyObject*v; codec=_PyCodec_LookupTextEncoding(encoding,alternate_command); if(codec==NULL) returnNULL; v=PyTuple_GET_ITEM(codec,index); Py_INCREF(v); Py_DECREF(codec); returnv; } staticPyObject*_PyCodec_TextEncoder(constchar*encoding) { returncodec_getitem_checked(encoding,"codecs.encode()",0); } staticPyObject*_PyCodec_TextDecoder(constchar*encoding) { returncodec_getitem_checked(encoding,"codecs.decode()",1); } PyObject*_PyCodec_EncodeText(PyObject*object, constchar*encoding, constchar*errors) { PyObject*encoder; encoder=_PyCodec_TextEncoder(encoding); if(encoder==NULL) returnNULL; return_PyCodec_EncodeInternal(object,encoder,encoding,errors); } PyObject*_PyCodec_DecodeText(PyObject*object, constchar*encoding, constchar*errors) { PyObject*decoder; decoder=_PyCodec_TextDecoder(encoding); if(decoder==NULL) returnNULL; return_PyCodec_DecodeInternal(object,decoder,encoding,errors); } /*Registertheerrorhandlingcallbackfunctionerrorunderthename name.Thisfunctionwillbecalledbythecodecwhenitencounters anunencodablecharacters/undecodablebytesanddoesn'tknowthe callbackname,whennameisspecifiedastheerrorparameter inthecalltotheencode/decodefunction. Return0onsuccess,-1onerror*/ intPyCodec_RegisterError(constchar*name,PyObject*error) { PyInterpreterState*interp=_PyInterpreterState_GET(); if(interp->codec_search_path==NULL&&_PyCodecRegistry_Init()) return-1; if(!PyCallable_Check(error)){ PyErr_SetString(PyExc_TypeError,"handlermustbecallable"); return-1; } returnPyDict_SetItemString(interp->codec_error_registry, name,error); } /*Lookuptheerrorhandlingcallbackfunctionregisteredunderthe nameerror.AsaspecialcaseNULLcanbepassed,inwhichcase theerrorhandlingcallbackforstrictencodingwillbereturned.*/ PyObject*PyCodec_LookupError(constchar*name) { PyObject*handler=NULL; PyInterpreterState*interp=_PyInterpreterState_GET(); if(interp->codec_search_path==NULL&&_PyCodecRegistry_Init()) returnNULL; if(name==NULL) name="strict"; handler=_PyDict_GetItemStringWithError(interp->codec_error_registry,name); if(handler){ Py_INCREF(handler); } elseif(!PyErr_Occurred()){ PyErr_Format(PyExc_LookupError,"unknownerrorhandlername'%.400s'",name); } returnhandler; } staticvoidwrong_exception_type(PyObject*exc) { PyErr_Format(PyExc_TypeError, "don'tknowhowtohandle%.200sinerrorcallback", Py_TYPE(exc)->tp_name); } PyObject*PyCodec_StrictErrors(PyObject*exc) { if(PyExceptionInstance_Check(exc)) PyErr_SetObject(PyExceptionInstance_Class(exc),exc); else PyErr_SetString(PyExc_TypeError,"codecmustpassexceptioninstance"); returnNULL; } PyObject*PyCodec_IgnoreErrors(PyObject*exc) { Py_ssize_tend; if(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeEncodeError)){ if(PyUnicodeEncodeError_GetEnd(exc,&end)) returnNULL; } elseif(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeDecodeError)){ if(PyUnicodeDecodeError_GetEnd(exc,&end)) returnNULL; } elseif(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeTranslateError)){ if(PyUnicodeTranslateError_GetEnd(exc,&end)) returnNULL; } else{ wrong_exception_type(exc); returnNULL; } returnPy_BuildValue("(Nn)",PyUnicode_New(0,0),end); } PyObject*PyCodec_ReplaceErrors(PyObject*exc) { Py_ssize_tstart,end,i,len; if(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeEncodeError)){ PyObject*res; Py_UCS1*outp; if(PyUnicodeEncodeError_GetStart(exc,&start)) returnNULL; if(PyUnicodeEncodeError_GetEnd(exc,&end)) returnNULL; len=end-start; res=PyUnicode_New(len,'?'); if(res==NULL) returnNULL; assert(PyUnicode_KIND(res)==PyUnicode_1BYTE_KIND); outp=PyUnicode_1BYTE_DATA(res); for(i=0;iPY_SSIZE_T_MAX/(2+7+1)) end=start+PY_SSIZE_T_MAX/(2+7+1); for(i=start,ressize=0;i0){ *outp++='0'+ch/base; ch%=base; base/=10; } *outp++=';'; } assert(_PyUnicode_CheckConsistency(res,1)); restuple=Py_BuildValue("(Nn)",res,end); Py_DECREF(object); returnrestuple; } else{ wrong_exception_type(exc); returnNULL; } } PyObject*PyCodec_BackslashReplaceErrors(PyObject*exc) { PyObject*object; Py_ssize_ti; Py_ssize_tstart; Py_ssize_tend; PyObject*res; Py_UCS1*outp; intressize; Py_UCS4c; if(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeDecodeError)){ constunsignedchar*p; if(PyUnicodeDecodeError_GetStart(exc,&start)) returnNULL; if(PyUnicodeDecodeError_GetEnd(exc,&end)) returnNULL; if(!(object=PyUnicodeDecodeError_GetObject(exc))) returnNULL; p=(constunsignedchar*)PyBytes_AS_STRING(object); res=PyUnicode_New(4*(end-start),127); if(res==NULL){ Py_DECREF(object); returnNULL; } outp=PyUnicode_1BYTE_DATA(res); for(i=start;i>4)&0xf]; outp[3]=Py_hexdigits[c&0xf]; } assert(_PyUnicode_CheckConsistency(res,1)); Py_DECREF(object); returnPy_BuildValue("(Nn)",res,end); } if(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeEncodeError)){ if(PyUnicodeEncodeError_GetStart(exc,&start)) returnNULL; if(PyUnicodeEncodeError_GetEnd(exc,&end)) returnNULL; if(!(object=PyUnicodeEncodeError_GetObject(exc))) returnNULL; } elseif(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeTranslateError)){ if(PyUnicodeTranslateError_GetStart(exc,&start)) returnNULL; if(PyUnicodeTranslateError_GetEnd(exc,&end)) returnNULL; if(!(object=PyUnicodeTranslateError_GetObject(exc))) returnNULL; } else{ wrong_exception_type(exc); returnNULL; } if(end-start>PY_SSIZE_T_MAX/(1+1+8)) end=start+PY_SSIZE_T_MAX/(1+1+8); for(i=start,ressize=0;i=0x10000){ ressize+=1+1+8; } elseif(c>=0x100){ ressize+=1+1+4; } else ressize+=1+1+2; } res=PyUnicode_New(ressize,127); if(res==NULL){ Py_DECREF(object); returnNULL; } outp=PyUnicode_1BYTE_DATA(res); for(i=start;i=0x00010000){ *outp++='U'; *outp++=Py_hexdigits[(c>>28)&0xf]; *outp++=Py_hexdigits[(c>>24)&0xf]; *outp++=Py_hexdigits[(c>>20)&0xf]; *outp++=Py_hexdigits[(c>>16)&0xf]; *outp++=Py_hexdigits[(c>>12)&0xf]; *outp++=Py_hexdigits[(c>>8)&0xf]; } elseif(c>=0x100){ *outp++='u'; *outp++=Py_hexdigits[(c>>12)&0xf]; *outp++=Py_hexdigits[(c>>8)&0xf]; } else *outp++='x'; *outp++=Py_hexdigits[(c>>4)&0xf]; *outp++=Py_hexdigits[c&0xf]; } assert(_PyUnicode_CheckConsistency(res,1)); Py_DECREF(object); returnPy_BuildValue("(Nn)",res,end); } static_PyUnicode_Name_CAPI*ucnhash_capi=NULL; PyObject*PyCodec_NameReplaceErrors(PyObject*exc) { if(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeEncodeError)){ PyObject*restuple; PyObject*object; Py_ssize_ti; Py_ssize_tstart; Py_ssize_tend; PyObject*res; Py_UCS1*outp; Py_ssize_tressize; intreplsize; Py_UCS4c; charbuffer[256];/*NAME_MAXLEN*/ if(PyUnicodeEncodeError_GetStart(exc,&start)) returnNULL; if(PyUnicodeEncodeError_GetEnd(exc,&end)) returnNULL; if(!(object=PyUnicodeEncodeError_GetObject(exc))) returnNULL; if(!ucnhash_capi){ /*loadtheunicodedatamodule*/ ucnhash_capi=(_PyUnicode_Name_CAPI*)PyCapsule_Import( PyUnicodeData_CAPSULE_NAME,1); if(!ucnhash_capi){ returnNULL; } } for(i=start,ressize=0;igetname(c,buffer,sizeof(buffer),1)){ replsize=1+1+1+(int)strlen(buffer)+1; } elseif(c>=0x10000){ replsize=1+1+8; } elseif(c>=0x100){ replsize=1+1+4; } else replsize=1+1+2; if(ressize>PY_SSIZE_T_MAX-replsize) break; ressize+=replsize; } end=i; res=PyUnicode_New(ressize,127); if(res==NULL) returnNULL; for(i=start,outp=PyUnicode_1BYTE_DATA(res); igetname(c,buffer,sizeof(buffer),1)){ *outp++='N'; *outp++='{'; strcpy((char*)outp,buffer); outp+=strlen(buffer); *outp++='}'; continue; } if(c>=0x00010000){ *outp++='U'; *outp++=Py_hexdigits[(c>>28)&0xf]; *outp++=Py_hexdigits[(c>>24)&0xf]; *outp++=Py_hexdigits[(c>>20)&0xf]; *outp++=Py_hexdigits[(c>>16)&0xf]; *outp++=Py_hexdigits[(c>>12)&0xf]; *outp++=Py_hexdigits[(c>>8)&0xf]; } elseif(c>=0x100){ *outp++='u'; *outp++=Py_hexdigits[(c>>12)&0xf]; *outp++=Py_hexdigits[(c>>8)&0xf]; } else *outp++='x'; *outp++=Py_hexdigits[(c>>4)&0xf]; *outp++=Py_hexdigits[c&0xf]; } assert(outp==PyUnicode_1BYTE_DATA(res)+ressize); assert(_PyUnicode_CheckConsistency(res,1)); restuple=Py_BuildValue("(Nn)",res,end); Py_DECREF(object); returnrestuple; } else{ wrong_exception_type(exc); returnNULL; } } #defineENC_UNKNOWN-1 #defineENC_UTF80 #defineENC_UTF16BE1 #defineENC_UTF16LE2 #defineENC_UTF32BE3 #defineENC_UTF32LE4 staticint get_standard_encoding(constchar*encoding,int*bytelength) { if(Py_TOLOWER(encoding[0])=='u'&& Py_TOLOWER(encoding[1])=='t'&& Py_TOLOWER(encoding[2])=='f'){ encoding+=3; if(*encoding=='-'||*encoding=='_') encoding++; if(encoding[0]=='8'&&encoding[1]=='\0'){ *bytelength=3; returnENC_UTF8; } elseif(encoding[0]=='1'&&encoding[1]=='6'){ encoding+=2; *bytelength=2; if(*encoding=='\0'){ #ifdefWORDS_BIGENDIAN returnENC_UTF16BE; #else returnENC_UTF16LE; #endif } if(*encoding=='-'||*encoding=='_') encoding++; if(Py_TOLOWER(encoding[1])=='e'&&encoding[2]=='\0'){ if(Py_TOLOWER(encoding[0])=='b') returnENC_UTF16BE; if(Py_TOLOWER(encoding[0])=='l') returnENC_UTF16LE; } } elseif(encoding[0]=='3'&&encoding[1]=='2'){ encoding+=2; *bytelength=4; if(*encoding=='\0'){ #ifdefWORDS_BIGENDIAN returnENC_UTF32BE; #else returnENC_UTF32LE; #endif } if(*encoding=='-'||*encoding=='_') encoding++; if(Py_TOLOWER(encoding[1])=='e'&&encoding[2]=='\0'){ if(Py_TOLOWER(encoding[0])=='b') returnENC_UTF32BE; if(Py_TOLOWER(encoding[0])=='l') returnENC_UTF32LE; } } } elseif(strcmp(encoding,"CP_UTF8")==0){ *bytelength=3; returnENC_UTF8; } returnENC_UNKNOWN; } /*Thishandlerisdeclaredstaticuntilsomeonedemonstrates aneedtocallitdirectly.*/ staticPyObject* PyCodec_SurrogatePassErrors(PyObject*exc) { PyObject*restuple; PyObject*object; PyObject*encode; constchar*encoding; intcode; intbytelength; Py_ssize_ti; Py_ssize_tstart; Py_ssize_tend; PyObject*res; if(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeEncodeError)){ unsignedchar*outp; if(PyUnicodeEncodeError_GetStart(exc,&start)) returnNULL; if(PyUnicodeEncodeError_GetEnd(exc,&end)) returnNULL; if(!(object=PyUnicodeEncodeError_GetObject(exc))) returnNULL; if(!(encode=PyUnicodeEncodeError_GetEncoding(exc))){ Py_DECREF(object); returnNULL; } if(!(encoding=PyUnicode_AsUTF8(encode))){ Py_DECREF(object); Py_DECREF(encode); returnNULL; } code=get_standard_encoding(encoding,&bytelength); Py_DECREF(encode); if(code==ENC_UNKNOWN){ /*Notsupported,failwithoriginalexception*/ PyErr_SetObject(PyExceptionInstance_Class(exc),exc); Py_DECREF(object); returnNULL; } if(end-start>PY_SSIZE_T_MAX/bytelength) end=start+PY_SSIZE_T_MAX/bytelength; res=PyBytes_FromStringAndSize(NULL,bytelength*(end-start)); if(!res){ Py_DECREF(object); returnNULL; } outp=(unsignedchar*)PyBytes_AsString(res); for(i=start;i>12)); *outp++=(unsignedchar)(0x80|((ch>>6)&0x3f)); *outp++=(unsignedchar)(0x80|(ch&0x3f)); break; caseENC_UTF16LE: *outp++=(unsignedchar)ch; *outp++=(unsignedchar)(ch>>8); break; caseENC_UTF16BE: *outp++=(unsignedchar)(ch>>8); *outp++=(unsignedchar)ch; break; caseENC_UTF32LE: *outp++=(unsignedchar)ch; *outp++=(unsignedchar)(ch>>8); *outp++=(unsignedchar)(ch>>16); *outp++=(unsignedchar)(ch>>24); break; caseENC_UTF32BE: *outp++=(unsignedchar)(ch>>24); *outp++=(unsignedchar)(ch>>16); *outp++=(unsignedchar)(ch>>8); *outp++=(unsignedchar)ch; break; } } restuple=Py_BuildValue("(On)",res,end); Py_DECREF(res); Py_DECREF(object); returnrestuple; } elseif(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeDecodeError)){ constunsignedchar*p; Py_UCS4ch=0; if(PyUnicodeDecodeError_GetStart(exc,&start)) returnNULL; if(PyUnicodeDecodeError_GetEnd(exc,&end)) returnNULL; if(!(object=PyUnicodeDecodeError_GetObject(exc))) returnNULL; p=(constunsignedchar*)PyBytes_AS_STRING(object); if(!(encode=PyUnicodeDecodeError_GetEncoding(exc))){ Py_DECREF(object); returnNULL; } if(!(encoding=PyUnicode_AsUTF8(encode))){ Py_DECREF(object); Py_DECREF(encode); returnNULL; } code=get_standard_encoding(encoding,&bytelength); Py_DECREF(encode); if(code==ENC_UNKNOWN){ /*Notsupported,failwithoriginalexception*/ PyErr_SetObject(PyExceptionInstance_Class(exc),exc); Py_DECREF(object); returnNULL; } /*Trydecodingasinglesurrogatecharacter.If therearemore,letthecodeccallusagain.*/ p+=start; if(PyBytes_GET_SIZE(object)-start>=bytelength){ switch(code){ caseENC_UTF8: if((p[0]&0xf0)==0xe0&& (p[1]&0xc0)==0x80&& (p[2]&0xc0)==0x80){ /*it'sathree-bytecode*/ ch=((p[0]&0x0f)<<12)+((p[1]&0x3f)<<6)+(p[2]&0x3f); } break; caseENC_UTF16LE: ch=p[1]<<8|p[0]; break; caseENC_UTF16BE: ch=p[0]<<8|p[1]; break; caseENC_UTF32LE: ch=(p[3]<<24)|(p[2]<<16)|(p[1]<<8)|p[0]; break; caseENC_UTF32BE: ch=(p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3]; break; } } Py_DECREF(object); if(!Py_UNICODE_IS_SURROGATE(ch)){ /*it'snotasurrogate-fail*/ PyErr_SetObject(PyExceptionInstance_Class(exc),exc); returnNULL; } res=PyUnicode_FromOrdinal(ch); if(res==NULL) returnNULL; returnPy_BuildValue("(Nn)",res,start+bytelength); } else{ wrong_exception_type(exc); returnNULL; } } staticPyObject* PyCodec_SurrogateEscapeErrors(PyObject*exc) { PyObject*restuple; PyObject*object; Py_ssize_ti; Py_ssize_tstart; Py_ssize_tend; PyObject*res; if(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeEncodeError)){ char*outp; if(PyUnicodeEncodeError_GetStart(exc,&start)) returnNULL; if(PyUnicodeEncodeError_GetEnd(exc,&end)) returnNULL; if(!(object=PyUnicodeEncodeError_GetObject(exc))) returnNULL; res=PyBytes_FromStringAndSize(NULL,end-start); if(!res){ Py_DECREF(object); returnNULL; } outp=PyBytes_AsString(res); for(i=start;i0xdcff){ /*NotaUTF-8bsurrogate,failwithoriginalexception*/ PyErr_SetObject(PyExceptionInstance_Class(exc),exc); Py_DECREF(res); Py_DECREF(object); returnNULL; } *outp++=ch-0xdc00; } restuple=Py_BuildValue("(On)",res,end); Py_DECREF(res); Py_DECREF(object); returnrestuple; } elseif(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeDecodeError)){ PyObject*str; constunsignedchar*p; Py_UCS2ch[4];/*decodeupto4badbytes.*/ intconsumed=0; if(PyUnicodeDecodeError_GetStart(exc,&start)) returnNULL; if(PyUnicodeDecodeError_GetEnd(exc,&end)) returnNULL; if(!(object=PyUnicodeDecodeError_GetObject(exc))) returnNULL; p=(constunsignedchar*)PyBytes_AS_STRING(object); while(consumed<4&&consumedcodec_search_path!=NULL) return0; interp->codec_search_path=PyList_New(0); if(interp->codec_search_path==NULL){ return-1; } interp->codec_search_cache=PyDict_New(); if(interp->codec_search_cache==NULL){ return-1; } interp->codec_error_registry=PyDict_New(); if(interp->codec_error_registry==NULL){ return-1; } for(size_ti=0;icodecs_initialized=1; return0; } Copylines Copypermalink Viewgitblame Referenceinnewissue Go Youcan’tperformthatactionatthistime. Yousignedinwithanothertaborwindow.Reloadtorefreshyoursession. Yousignedoutinanothertaborwindow.Reloadtorefreshyoursession.



請為這篇文章評分?