Python Codec Registry and support functions. Written by Marc-Andre Lemburg ([email protected]). Copyright (c) Corporation for National Research Initiatives.
Skiptocontent
{{message}}
python
/
cpython
Public
Notifications
Fork
24.4k
Star
48k
Code
Issues
5k+
Pullrequests
1.5k
Actions
Projects
27
Security
Insights
More
Code
Issues
Pullrequests
Actions
Projects
Security
Insights
Permalink
main
Branches
Tags
Couldnotloadbranches
Nothingtoshow
{{refName}}
default
Couldnotloadtags
Nothingtoshow
{{refName}}
default
cpython/Python/codecs.c
Gotofile
Gotofile
T
Gotoline
L
Copypath
Copypermalink
Thiscommitdoesnotbelongtoanybranchonthisrepository,andmaybelongtoaforkoutsideoftherepository.
Cannotretrievecontributorsatthistime
1536lines(1379sloc)
45KB
Raw
Blame
Editthisfile
E
OpeninGitHubDesktop
OpenwithDesktop
Viewraw
Viewblame
ThisfilecontainsbidirectionalUnicodetextthatmaybeinterpretedorcompileddifferentlythanwhatappearsbelow.Toreview,openthefileinaneditorthatrevealshiddenUnicodecharacters.
LearnmoreaboutbidirectionalUnicodecharacters
Showhiddencharacters
/*------------------------------------------------------------------------
PythonCodecRegistryandsupportfunctions
WrittenbyMarc-AndreLemburg([email protected]).
Copyright(c)CorporationforNationalResearchInitiatives.
------------------------------------------------------------------------*/
#include"Python.h"
#include"pycore_call.h"//_PyObject_CallNoArgs()
#include"pycore_interp.h"//PyInterpreterState.codec_search_path
#include"pycore_pystate.h"//_PyInterpreterState_GET()
#include"pycore_ucnhash.h"//_PyUnicode_Name_CAPI
#include
constchar*Py_hexdigits="0123456789abcdef";
/*---CodecRegistry-----------------------------------------------------*/
/*Importthestandardencodingspackagewhichwillregisterthefirst
codecsearchfunction.
ThisisdoneinalazywaysothattheUnicodeimplementationdoes
notdowngradestartuptimeofscriptsnotneedingit.
ImportErrorsaresilentlyignoredbythisfunction.Onlyonetryis
made.
*/
staticint_PyCodecRegistry_Init(void);/*Forward*/
intPyCodec_Register(PyObject*search_function)
{
PyInterpreterState*interp=_PyInterpreterState_GET();
if(interp->codec_search_path==NULL&&_PyCodecRegistry_Init())
gotoonError;
if(search_function==NULL){
PyErr_BadArgument();
gotoonError;
}
if(!PyCallable_Check(search_function)){
PyErr_SetString(PyExc_TypeError,"argumentmustbecallable");
gotoonError;
}
returnPyList_Append(interp->codec_search_path,search_function);
onError:
return-1;
}
int
PyCodec_Unregister(PyObject*search_function)
{
PyInterpreterState*interp=PyInterpreterState_Get();
PyObject*codec_search_path=interp->codec_search_path;
/*Donothingifcodec_search_pathisnotcreatedyetorwascleared.*/
if(codec_search_path==NULL){
return0;
}
assert(PyList_CheckExact(codec_search_path));
Py_ssize_tn=PyList_GET_SIZE(codec_search_path);
for(Py_ssize_ti=0;icodec_search_cache!=NULL){
assert(PyDict_CheckExact(interp->codec_search_cache));
PyDict_Clear(interp->codec_search_cache);
}
returnPyList_SetSlice(codec_search_path,i,i+1,NULL);
}
}
return0;
}
externint_Py_normalize_encoding(constchar*,char*,size_t);
/*ConvertastringtoanormalizedPythonstring(decodedfromUTF-8):allcharactersare
convertedtolowercase,spacesandhyphensarereplacedwithunderscores.*/
static
PyObject*normalizestring(constchar*string)
{
size_tlen=strlen(string);
char*encoding;
PyObject*v;
if(len>PY_SSIZE_T_MAX){
PyErr_SetString(PyExc_OverflowError,"stringistoolarge");
returnNULL;
}
encoding=PyMem_Malloc(len+1);
if(encoding==NULL)
returnPyErr_NoMemory();
if(!_Py_normalize_encoding(string,encoding,len+1))
{
PyErr_SetString(PyExc_RuntimeError,"_Py_normalize_encoding()failed");
PyMem_Free(encoding);
returnNULL;
}
v=PyUnicode_FromString(encoding);
PyMem_Free(encoding);
returnv;
}
/*Lookupthegivenencodingandreturnatupleprovidingthecodec
facilities.
Theencodingstringislookedupconvertedtoalllower-case
characters.Thismakesencodingslookedupthroughthismechanism
effectivelycase-insensitive.
Ifnocodecisfound,aLookupErrorissetandNULLreturned.
Assideeffect,thistriestoloadtheencodingspackage,ifnot
yetdone.Thisispartofthelazyloadstrategyfortheencodings
package.
*/
PyObject*_PyCodec_Lookup(constchar*encoding)
{
if(encoding==NULL){
PyErr_BadArgument();
returnNULL;
}
PyInterpreterState*interp=_PyInterpreterState_GET();
if(interp->codec_search_path==NULL&&_PyCodecRegistry_Init()){
returnNULL;
}
/*ConverttheencodingtoanormalizedPythonstring:all
charactersareconvertedtolowercase,spacesandhyphensare
replacedwithunderscores.*/
PyObject*v=normalizestring(encoding);
if(v==NULL){
returnNULL;
}
PyUnicode_InternInPlace(&v);
/*First,trytolookupthenameintheregistrydictionary*/
PyObject*result=PyDict_GetItemWithError(interp->codec_search_cache,v);
if(result!=NULL){
Py_INCREF(result);
Py_DECREF(v);
returnresult;
}
elseif(PyErr_Occurred()){
gotoonError;
}
/*Next,scanthesearchfunctionsinorderofregistration*/
constPy_ssize_tlen=PyList_Size(interp->codec_search_path);
if(len<0)
gotoonError;
if(len==0){
PyErr_SetString(PyExc_LookupError,
"nocodecsearchfunctionsregistered:"
"can'tfindencoding");
gotoonError;
}
Py_ssize_ti;
for(i=0;icodec_search_path,i);
if(func==NULL)
gotoonError;
result=PyObject_CallOneArg(func,v);
if(result==NULL)
gotoonError;
if(result==Py_None){
Py_DECREF(result);
continue;
}
if(!PyTuple_Check(result)||PyTuple_GET_SIZE(result)!=4){
PyErr_SetString(PyExc_TypeError,
"codecsearchfunctionsmustreturn4-tuples");
Py_DECREF(result);
gotoonError;
}
break;
}
if(i==len){
/*XXXPerhapsweshouldcachemissestoo?*/
PyErr_Format(PyExc_LookupError,
"unknownencoding:%s",encoding);
gotoonError;
}
/*Cacheandreturntheresult*/
if(PyDict_SetItem(interp->codec_search_cache,v,result)<0){
Py_DECREF(result);
gotoonError;
}
Py_DECREF(v);
returnresult;
onError:
Py_DECREF(v);
returnNULL;
}
/*CodecregistryencodingcheckAPI.*/
intPyCodec_KnownEncoding(constchar*encoding)
{
PyObject*codecs;
codecs=_PyCodec_Lookup(encoding);
if(!codecs){
PyErr_Clear();
return0;
}
else{
Py_DECREF(codecs);
return1;
}
}
static
PyObject*args_tuple(PyObject*object,
constchar*errors)
{
PyObject*args;
args=PyTuple_New(1+(errors!=NULL));
if(args==NULL)
returnNULL;
Py_INCREF(object);
PyTuple_SET_ITEM(args,0,object);
if(errors){
PyObject*v;
v=PyUnicode_FromString(errors);
if(v==NULL){
Py_DECREF(args);
returnNULL;
}
PyTuple_SET_ITEM(args,1,v);
}
returnargs;
}
/*Helperfunctiontogetacodecitem*/
static
PyObject*codec_getitem(constchar*encoding,intindex)
{
PyObject*codecs;
PyObject*v;
codecs=_PyCodec_Lookup(encoding);
if(codecs==NULL)
returnNULL;
v=PyTuple_GET_ITEM(codecs,index);
Py_DECREF(codecs);
Py_INCREF(v);
returnv;
}
/*Helperfunctionstocreateanincrementalcodec.*/
static
PyObject*codec_makeincrementalcodec(PyObject*codec_info,
constchar*errors,
constchar*attrname)
{
PyObject*ret,*inccodec;
inccodec=PyObject_GetAttrString(codec_info,attrname);
if(inccodec==NULL)
returnNULL;
if(errors)
ret=PyObject_CallFunction(inccodec,"s",errors);
else
ret=_PyObject_CallNoArgs(inccodec);
Py_DECREF(inccodec);
returnret;
}
static
PyObject*codec_getincrementalcodec(constchar*encoding,
constchar*errors,
constchar*attrname)
{
PyObject*codec_info,*ret;
codec_info=_PyCodec_Lookup(encoding);
if(codec_info==NULL)
returnNULL;
ret=codec_makeincrementalcodec(codec_info,errors,attrname);
Py_DECREF(codec_info);
returnret;
}
/*Helperfunctiontocreateastreamcodec.*/
static
PyObject*codec_getstreamcodec(constchar*encoding,
PyObject*stream,
constchar*errors,
constintindex)
{
PyObject*codecs,*streamcodec,*codeccls;
codecs=_PyCodec_Lookup(encoding);
if(codecs==NULL)
returnNULL;
codeccls=PyTuple_GET_ITEM(codecs,index);
if(errors!=NULL)
streamcodec=PyObject_CallFunction(codeccls,"Os",stream,errors);
else
streamcodec=PyObject_CallOneArg(codeccls,stream);
Py_DECREF(codecs);
returnstreamcodec;
}
/*Helperstoworkwiththeresultof_PyCodec_Lookup
*/
PyObject*_PyCodecInfo_GetIncrementalDecoder(PyObject*codec_info,
constchar*errors)
{
returncodec_makeincrementalcodec(codec_info,errors,
"incrementaldecoder");
}
PyObject*_PyCodecInfo_GetIncrementalEncoder(PyObject*codec_info,
constchar*errors)
{
returncodec_makeincrementalcodec(codec_info,errors,
"incrementalencoder");
}
/*ConvenienceAPIstoquerytheCodecregistry.
AllAPIsreturnacodecobjectwithincrementedrefcount.
*/
PyObject*PyCodec_Encoder(constchar*encoding)
{
returncodec_getitem(encoding,0);
}
PyObject*PyCodec_Decoder(constchar*encoding)
{
returncodec_getitem(encoding,1);
}
PyObject*PyCodec_IncrementalEncoder(constchar*encoding,
constchar*errors)
{
returncodec_getincrementalcodec(encoding,errors,"incrementalencoder");
}
PyObject*PyCodec_IncrementalDecoder(constchar*encoding,
constchar*errors)
{
returncodec_getincrementalcodec(encoding,errors,"incrementaldecoder");
}
PyObject*PyCodec_StreamReader(constchar*encoding,
PyObject*stream,
constchar*errors)
{
returncodec_getstreamcodec(encoding,stream,errors,2);
}
PyObject*PyCodec_StreamWriter(constchar*encoding,
PyObject*stream,
constchar*errors)
{
returncodec_getstreamcodec(encoding,stream,errors,3);
}
/*Helperthattriestoensurethereportedexceptionchainindicatesthe
*codecthatwasinvokedtotriggerthefailurewithoutchangingthetype
*oftheexceptionraised.
*/
staticvoid
wrap_codec_error(constchar*operation,
constchar*encoding)
{
/*TrySetFromCausewillreplacetheactiveexceptionwithasuitably
*updatedcloneifitcan,otherwiseitwillleavetheoriginal
*exceptionalone.
*/
_PyErr_TrySetFromCause("%swith'%s'codecfailed",
operation,encoding);
}
/*Encodeanobject(e.g.aUnicodeobject)usingthegivenencoding
andreturntheresultingencodedobject(usuallyaPythonstring).
errorsispassedtotheencoderfactoryasargumentifnon-NULL.*/
staticPyObject*
_PyCodec_EncodeInternal(PyObject*object,
PyObject*encoder,
constchar*encoding,
constchar*errors)
{
PyObject*args=NULL,*result=NULL;
PyObject*v=NULL;
args=args_tuple(object,errors);
if(args==NULL)
gotoonError;
result=PyObject_Call(encoder,args,NULL);
if(result==NULL){
wrap_codec_error("encoding",encoding);
gotoonError;
}
if(!PyTuple_Check(result)||
PyTuple_GET_SIZE(result)!=2){
PyErr_SetString(PyExc_TypeError,
"encodermustreturnatuple(object,integer)");
gotoonError;
}
v=PyTuple_GET_ITEM(result,0);
Py_INCREF(v);
/*Wedon'tcheckorusethesecond(integer)entry.*/
Py_DECREF(args);
Py_DECREF(encoder);
Py_DECREF(result);
returnv;
onError:
Py_XDECREF(result);
Py_XDECREF(args);
Py_XDECREF(encoder);
returnNULL;
}
/*Decodeanobject(usuallyaPythonstring)usingthegivenencoding
andreturnanequivalentobject(e.g.aUnicodeobject).
errorsispassedtothedecoderfactoryasargumentifnon-NULL.*/
staticPyObject*
_PyCodec_DecodeInternal(PyObject*object,
PyObject*decoder,
constchar*encoding,
constchar*errors)
{
PyObject*args=NULL,*result=NULL;
PyObject*v;
args=args_tuple(object,errors);
if(args==NULL)
gotoonError;
result=PyObject_Call(decoder,args,NULL);
if(result==NULL){
wrap_codec_error("decoding",encoding);
gotoonError;
}
if(!PyTuple_Check(result)||
PyTuple_GET_SIZE(result)!=2){
PyErr_SetString(PyExc_TypeError,
"decodermustreturnatuple(object,integer)");
gotoonError;
}
v=PyTuple_GET_ITEM(result,0);
Py_INCREF(v);
/*Wedon'tcheckorusethesecond(integer)entry.*/
Py_DECREF(args);
Py_DECREF(decoder);
Py_DECREF(result);
returnv;
onError:
Py_XDECREF(args);
Py_XDECREF(decoder);
Py_XDECREF(result);
returnNULL;
}
/*Genericencoding/decodingAPI*/
PyObject*PyCodec_Encode(PyObject*object,
constchar*encoding,
constchar*errors)
{
PyObject*encoder;
encoder=PyCodec_Encoder(encoding);
if(encoder==NULL)
returnNULL;
return_PyCodec_EncodeInternal(object,encoder,encoding,errors);
}
PyObject*PyCodec_Decode(PyObject*object,
constchar*encoding,
constchar*errors)
{
PyObject*decoder;
decoder=PyCodec_Decoder(encoding);
if(decoder==NULL)
returnNULL;
return_PyCodec_DecodeInternal(object,decoder,encoding,errors);
}
/*Textencoding/decodingAPI*/
PyObject*_PyCodec_LookupTextEncoding(constchar*encoding,
constchar*alternate_command)
{
PyObject*codec;
PyObject*attr;
intis_text_codec;
codec=_PyCodec_Lookup(encoding);
if(codec==NULL)
returnNULL;
/*Backwardscompatibility:assumeanyrawtupledescribesatext
*encoding,andthesameforanythinglackingtheprivate
*attribute.
*/
if(!PyTuple_CheckExact(codec)){
if(_PyObject_LookupAttr(codec,&_Py_ID(_is_text_encoding),&attr)<0){
Py_DECREF(codec);
returnNULL;
}
if(attr!=NULL){
is_text_codec=PyObject_IsTrue(attr);
Py_DECREF(attr);
if(is_text_codec<=0){
Py_DECREF(codec);
if(!is_text_codec)
PyErr_Format(PyExc_LookupError,
"'%.400s'isnotatextencoding;"
"use%stohandlearbitrarycodecs",
encoding,alternate_command);
returnNULL;
}
}
}
/*Thisappearstobeavalidtextencoding*/
returncodec;
}
static
PyObject*codec_getitem_checked(constchar*encoding,
constchar*alternate_command,
intindex)
{
PyObject*codec;
PyObject*v;
codec=_PyCodec_LookupTextEncoding(encoding,alternate_command);
if(codec==NULL)
returnNULL;
v=PyTuple_GET_ITEM(codec,index);
Py_INCREF(v);
Py_DECREF(codec);
returnv;
}
staticPyObject*_PyCodec_TextEncoder(constchar*encoding)
{
returncodec_getitem_checked(encoding,"codecs.encode()",0);
}
staticPyObject*_PyCodec_TextDecoder(constchar*encoding)
{
returncodec_getitem_checked(encoding,"codecs.decode()",1);
}
PyObject*_PyCodec_EncodeText(PyObject*object,
constchar*encoding,
constchar*errors)
{
PyObject*encoder;
encoder=_PyCodec_TextEncoder(encoding);
if(encoder==NULL)
returnNULL;
return_PyCodec_EncodeInternal(object,encoder,encoding,errors);
}
PyObject*_PyCodec_DecodeText(PyObject*object,
constchar*encoding,
constchar*errors)
{
PyObject*decoder;
decoder=_PyCodec_TextDecoder(encoding);
if(decoder==NULL)
returnNULL;
return_PyCodec_DecodeInternal(object,decoder,encoding,errors);
}
/*Registertheerrorhandlingcallbackfunctionerrorunderthename
name.Thisfunctionwillbecalledbythecodecwhenitencounters
anunencodablecharacters/undecodablebytesanddoesn'tknowthe
callbackname,whennameisspecifiedastheerrorparameter
inthecalltotheencode/decodefunction.
Return0onsuccess,-1onerror*/
intPyCodec_RegisterError(constchar*name,PyObject*error)
{
PyInterpreterState*interp=_PyInterpreterState_GET();
if(interp->codec_search_path==NULL&&_PyCodecRegistry_Init())
return-1;
if(!PyCallable_Check(error)){
PyErr_SetString(PyExc_TypeError,"handlermustbecallable");
return-1;
}
returnPyDict_SetItemString(interp->codec_error_registry,
name,error);
}
/*Lookuptheerrorhandlingcallbackfunctionregisteredunderthe
nameerror.AsaspecialcaseNULLcanbepassed,inwhichcase
theerrorhandlingcallbackforstrictencodingwillbereturned.*/
PyObject*PyCodec_LookupError(constchar*name)
{
PyObject*handler=NULL;
PyInterpreterState*interp=_PyInterpreterState_GET();
if(interp->codec_search_path==NULL&&_PyCodecRegistry_Init())
returnNULL;
if(name==NULL)
name="strict";
handler=_PyDict_GetItemStringWithError(interp->codec_error_registry,name);
if(handler){
Py_INCREF(handler);
}
elseif(!PyErr_Occurred()){
PyErr_Format(PyExc_LookupError,"unknownerrorhandlername'%.400s'",name);
}
returnhandler;
}
staticvoidwrong_exception_type(PyObject*exc)
{
PyErr_Format(PyExc_TypeError,
"don'tknowhowtohandle%.200sinerrorcallback",
Py_TYPE(exc)->tp_name);
}
PyObject*PyCodec_StrictErrors(PyObject*exc)
{
if(PyExceptionInstance_Check(exc))
PyErr_SetObject(PyExceptionInstance_Class(exc),exc);
else
PyErr_SetString(PyExc_TypeError,"codecmustpassexceptioninstance");
returnNULL;
}
PyObject*PyCodec_IgnoreErrors(PyObject*exc)
{
Py_ssize_tend;
if(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeEncodeError)){
if(PyUnicodeEncodeError_GetEnd(exc,&end))
returnNULL;
}
elseif(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeDecodeError)){
if(PyUnicodeDecodeError_GetEnd(exc,&end))
returnNULL;
}
elseif(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeTranslateError)){
if(PyUnicodeTranslateError_GetEnd(exc,&end))
returnNULL;
}
else{
wrong_exception_type(exc);
returnNULL;
}
returnPy_BuildValue("(Nn)",PyUnicode_New(0,0),end);
}
PyObject*PyCodec_ReplaceErrors(PyObject*exc)
{
Py_ssize_tstart,end,i,len;
if(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeEncodeError)){
PyObject*res;
Py_UCS1*outp;
if(PyUnicodeEncodeError_GetStart(exc,&start))
returnNULL;
if(PyUnicodeEncodeError_GetEnd(exc,&end))
returnNULL;
len=end-start;
res=PyUnicode_New(len,'?');
if(res==NULL)
returnNULL;
assert(PyUnicode_KIND(res)==PyUnicode_1BYTE_KIND);
outp=PyUnicode_1BYTE_DATA(res);
for(i=0;iPY_SSIZE_T_MAX/(2+7+1))
end=start+PY_SSIZE_T_MAX/(2+7+1);
for(i=start,ressize=0;i0){
*outp++='0'+ch/base;
ch%=base;
base/=10;
}
*outp++=';';
}
assert(_PyUnicode_CheckConsistency(res,1));
restuple=Py_BuildValue("(Nn)",res,end);
Py_DECREF(object);
returnrestuple;
}
else{
wrong_exception_type(exc);
returnNULL;
}
}
PyObject*PyCodec_BackslashReplaceErrors(PyObject*exc)
{
PyObject*object;
Py_ssize_ti;
Py_ssize_tstart;
Py_ssize_tend;
PyObject*res;
Py_UCS1*outp;
intressize;
Py_UCS4c;
if(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeDecodeError)){
constunsignedchar*p;
if(PyUnicodeDecodeError_GetStart(exc,&start))
returnNULL;
if(PyUnicodeDecodeError_GetEnd(exc,&end))
returnNULL;
if(!(object=PyUnicodeDecodeError_GetObject(exc)))
returnNULL;
p=(constunsignedchar*)PyBytes_AS_STRING(object);
res=PyUnicode_New(4*(end-start),127);
if(res==NULL){
Py_DECREF(object);
returnNULL;
}
outp=PyUnicode_1BYTE_DATA(res);
for(i=start;i>4)&0xf];
outp[3]=Py_hexdigits[c&0xf];
}
assert(_PyUnicode_CheckConsistency(res,1));
Py_DECREF(object);
returnPy_BuildValue("(Nn)",res,end);
}
if(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeEncodeError)){
if(PyUnicodeEncodeError_GetStart(exc,&start))
returnNULL;
if(PyUnicodeEncodeError_GetEnd(exc,&end))
returnNULL;
if(!(object=PyUnicodeEncodeError_GetObject(exc)))
returnNULL;
}
elseif(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeTranslateError)){
if(PyUnicodeTranslateError_GetStart(exc,&start))
returnNULL;
if(PyUnicodeTranslateError_GetEnd(exc,&end))
returnNULL;
if(!(object=PyUnicodeTranslateError_GetObject(exc)))
returnNULL;
}
else{
wrong_exception_type(exc);
returnNULL;
}
if(end-start>PY_SSIZE_T_MAX/(1+1+8))
end=start+PY_SSIZE_T_MAX/(1+1+8);
for(i=start,ressize=0;i=0x10000){
ressize+=1+1+8;
}
elseif(c>=0x100){
ressize+=1+1+4;
}
else
ressize+=1+1+2;
}
res=PyUnicode_New(ressize,127);
if(res==NULL){
Py_DECREF(object);
returnNULL;
}
outp=PyUnicode_1BYTE_DATA(res);
for(i=start;i=0x00010000){
*outp++='U';
*outp++=Py_hexdigits[(c>>28)&0xf];
*outp++=Py_hexdigits[(c>>24)&0xf];
*outp++=Py_hexdigits[(c>>20)&0xf];
*outp++=Py_hexdigits[(c>>16)&0xf];
*outp++=Py_hexdigits[(c>>12)&0xf];
*outp++=Py_hexdigits[(c>>8)&0xf];
}
elseif(c>=0x100){
*outp++='u';
*outp++=Py_hexdigits[(c>>12)&0xf];
*outp++=Py_hexdigits[(c>>8)&0xf];
}
else
*outp++='x';
*outp++=Py_hexdigits[(c>>4)&0xf];
*outp++=Py_hexdigits[c&0xf];
}
assert(_PyUnicode_CheckConsistency(res,1));
Py_DECREF(object);
returnPy_BuildValue("(Nn)",res,end);
}
static_PyUnicode_Name_CAPI*ucnhash_capi=NULL;
PyObject*PyCodec_NameReplaceErrors(PyObject*exc)
{
if(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeEncodeError)){
PyObject*restuple;
PyObject*object;
Py_ssize_ti;
Py_ssize_tstart;
Py_ssize_tend;
PyObject*res;
Py_UCS1*outp;
Py_ssize_tressize;
intreplsize;
Py_UCS4c;
charbuffer[256];/*NAME_MAXLEN*/
if(PyUnicodeEncodeError_GetStart(exc,&start))
returnNULL;
if(PyUnicodeEncodeError_GetEnd(exc,&end))
returnNULL;
if(!(object=PyUnicodeEncodeError_GetObject(exc)))
returnNULL;
if(!ucnhash_capi){
/*loadtheunicodedatamodule*/
ucnhash_capi=(_PyUnicode_Name_CAPI*)PyCapsule_Import(
PyUnicodeData_CAPSULE_NAME,1);
if(!ucnhash_capi){
returnNULL;
}
}
for(i=start,ressize=0;igetname(c,buffer,sizeof(buffer),1)){
replsize=1+1+1+(int)strlen(buffer)+1;
}
elseif(c>=0x10000){
replsize=1+1+8;
}
elseif(c>=0x100){
replsize=1+1+4;
}
else
replsize=1+1+2;
if(ressize>PY_SSIZE_T_MAX-replsize)
break;
ressize+=replsize;
}
end=i;
res=PyUnicode_New(ressize,127);
if(res==NULL)
returnNULL;
for(i=start,outp=PyUnicode_1BYTE_DATA(res);
igetname(c,buffer,sizeof(buffer),1)){
*outp++='N';
*outp++='{';
strcpy((char*)outp,buffer);
outp+=strlen(buffer);
*outp++='}';
continue;
}
if(c>=0x00010000){
*outp++='U';
*outp++=Py_hexdigits[(c>>28)&0xf];
*outp++=Py_hexdigits[(c>>24)&0xf];
*outp++=Py_hexdigits[(c>>20)&0xf];
*outp++=Py_hexdigits[(c>>16)&0xf];
*outp++=Py_hexdigits[(c>>12)&0xf];
*outp++=Py_hexdigits[(c>>8)&0xf];
}
elseif(c>=0x100){
*outp++='u';
*outp++=Py_hexdigits[(c>>12)&0xf];
*outp++=Py_hexdigits[(c>>8)&0xf];
}
else
*outp++='x';
*outp++=Py_hexdigits[(c>>4)&0xf];
*outp++=Py_hexdigits[c&0xf];
}
assert(outp==PyUnicode_1BYTE_DATA(res)+ressize);
assert(_PyUnicode_CheckConsistency(res,1));
restuple=Py_BuildValue("(Nn)",res,end);
Py_DECREF(object);
returnrestuple;
}
else{
wrong_exception_type(exc);
returnNULL;
}
}
#defineENC_UNKNOWN-1
#defineENC_UTF80
#defineENC_UTF16BE1
#defineENC_UTF16LE2
#defineENC_UTF32BE3
#defineENC_UTF32LE4
staticint
get_standard_encoding(constchar*encoding,int*bytelength)
{
if(Py_TOLOWER(encoding[0])=='u'&&
Py_TOLOWER(encoding[1])=='t'&&
Py_TOLOWER(encoding[2])=='f'){
encoding+=3;
if(*encoding=='-'||*encoding=='_')
encoding++;
if(encoding[0]=='8'&&encoding[1]=='\0'){
*bytelength=3;
returnENC_UTF8;
}
elseif(encoding[0]=='1'&&encoding[1]=='6'){
encoding+=2;
*bytelength=2;
if(*encoding=='\0'){
#ifdefWORDS_BIGENDIAN
returnENC_UTF16BE;
#else
returnENC_UTF16LE;
#endif
}
if(*encoding=='-'||*encoding=='_')
encoding++;
if(Py_TOLOWER(encoding[1])=='e'&&encoding[2]=='\0'){
if(Py_TOLOWER(encoding[0])=='b')
returnENC_UTF16BE;
if(Py_TOLOWER(encoding[0])=='l')
returnENC_UTF16LE;
}
}
elseif(encoding[0]=='3'&&encoding[1]=='2'){
encoding+=2;
*bytelength=4;
if(*encoding=='\0'){
#ifdefWORDS_BIGENDIAN
returnENC_UTF32BE;
#else
returnENC_UTF32LE;
#endif
}
if(*encoding=='-'||*encoding=='_')
encoding++;
if(Py_TOLOWER(encoding[1])=='e'&&encoding[2]=='\0'){
if(Py_TOLOWER(encoding[0])=='b')
returnENC_UTF32BE;
if(Py_TOLOWER(encoding[0])=='l')
returnENC_UTF32LE;
}
}
}
elseif(strcmp(encoding,"CP_UTF8")==0){
*bytelength=3;
returnENC_UTF8;
}
returnENC_UNKNOWN;
}
/*Thishandlerisdeclaredstaticuntilsomeonedemonstrates
aneedtocallitdirectly.*/
staticPyObject*
PyCodec_SurrogatePassErrors(PyObject*exc)
{
PyObject*restuple;
PyObject*object;
PyObject*encode;
constchar*encoding;
intcode;
intbytelength;
Py_ssize_ti;
Py_ssize_tstart;
Py_ssize_tend;
PyObject*res;
if(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeEncodeError)){
unsignedchar*outp;
if(PyUnicodeEncodeError_GetStart(exc,&start))
returnNULL;
if(PyUnicodeEncodeError_GetEnd(exc,&end))
returnNULL;
if(!(object=PyUnicodeEncodeError_GetObject(exc)))
returnNULL;
if(!(encode=PyUnicodeEncodeError_GetEncoding(exc))){
Py_DECREF(object);
returnNULL;
}
if(!(encoding=PyUnicode_AsUTF8(encode))){
Py_DECREF(object);
Py_DECREF(encode);
returnNULL;
}
code=get_standard_encoding(encoding,&bytelength);
Py_DECREF(encode);
if(code==ENC_UNKNOWN){
/*Notsupported,failwithoriginalexception*/
PyErr_SetObject(PyExceptionInstance_Class(exc),exc);
Py_DECREF(object);
returnNULL;
}
if(end-start>PY_SSIZE_T_MAX/bytelength)
end=start+PY_SSIZE_T_MAX/bytelength;
res=PyBytes_FromStringAndSize(NULL,bytelength*(end-start));
if(!res){
Py_DECREF(object);
returnNULL;
}
outp=(unsignedchar*)PyBytes_AsString(res);
for(i=start;i>12));
*outp++=(unsignedchar)(0x80|((ch>>6)&0x3f));
*outp++=(unsignedchar)(0x80|(ch&0x3f));
break;
caseENC_UTF16LE:
*outp++=(unsignedchar)ch;
*outp++=(unsignedchar)(ch>>8);
break;
caseENC_UTF16BE:
*outp++=(unsignedchar)(ch>>8);
*outp++=(unsignedchar)ch;
break;
caseENC_UTF32LE:
*outp++=(unsignedchar)ch;
*outp++=(unsignedchar)(ch>>8);
*outp++=(unsignedchar)(ch>>16);
*outp++=(unsignedchar)(ch>>24);
break;
caseENC_UTF32BE:
*outp++=(unsignedchar)(ch>>24);
*outp++=(unsignedchar)(ch>>16);
*outp++=(unsignedchar)(ch>>8);
*outp++=(unsignedchar)ch;
break;
}
}
restuple=Py_BuildValue("(On)",res,end);
Py_DECREF(res);
Py_DECREF(object);
returnrestuple;
}
elseif(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeDecodeError)){
constunsignedchar*p;
Py_UCS4ch=0;
if(PyUnicodeDecodeError_GetStart(exc,&start))
returnNULL;
if(PyUnicodeDecodeError_GetEnd(exc,&end))
returnNULL;
if(!(object=PyUnicodeDecodeError_GetObject(exc)))
returnNULL;
p=(constunsignedchar*)PyBytes_AS_STRING(object);
if(!(encode=PyUnicodeDecodeError_GetEncoding(exc))){
Py_DECREF(object);
returnNULL;
}
if(!(encoding=PyUnicode_AsUTF8(encode))){
Py_DECREF(object);
Py_DECREF(encode);
returnNULL;
}
code=get_standard_encoding(encoding,&bytelength);
Py_DECREF(encode);
if(code==ENC_UNKNOWN){
/*Notsupported,failwithoriginalexception*/
PyErr_SetObject(PyExceptionInstance_Class(exc),exc);
Py_DECREF(object);
returnNULL;
}
/*Trydecodingasinglesurrogatecharacter.If
therearemore,letthecodeccallusagain.*/
p+=start;
if(PyBytes_GET_SIZE(object)-start>=bytelength){
switch(code){
caseENC_UTF8:
if((p[0]&0xf0)==0xe0&&
(p[1]&0xc0)==0x80&&
(p[2]&0xc0)==0x80){
/*it'sathree-bytecode*/
ch=((p[0]&0x0f)<<12)+((p[1]&0x3f)<<6)+(p[2]&0x3f);
}
break;
caseENC_UTF16LE:
ch=p[1]<<8|p[0];
break;
caseENC_UTF16BE:
ch=p[0]<<8|p[1];
break;
caseENC_UTF32LE:
ch=(p[3]<<24)|(p[2]<<16)|(p[1]<<8)|p[0];
break;
caseENC_UTF32BE:
ch=(p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3];
break;
}
}
Py_DECREF(object);
if(!Py_UNICODE_IS_SURROGATE(ch)){
/*it'snotasurrogate-fail*/
PyErr_SetObject(PyExceptionInstance_Class(exc),exc);
returnNULL;
}
res=PyUnicode_FromOrdinal(ch);
if(res==NULL)
returnNULL;
returnPy_BuildValue("(Nn)",res,start+bytelength);
}
else{
wrong_exception_type(exc);
returnNULL;
}
}
staticPyObject*
PyCodec_SurrogateEscapeErrors(PyObject*exc)
{
PyObject*restuple;
PyObject*object;
Py_ssize_ti;
Py_ssize_tstart;
Py_ssize_tend;
PyObject*res;
if(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeEncodeError)){
char*outp;
if(PyUnicodeEncodeError_GetStart(exc,&start))
returnNULL;
if(PyUnicodeEncodeError_GetEnd(exc,&end))
returnNULL;
if(!(object=PyUnicodeEncodeError_GetObject(exc)))
returnNULL;
res=PyBytes_FromStringAndSize(NULL,end-start);
if(!res){
Py_DECREF(object);
returnNULL;
}
outp=PyBytes_AsString(res);
for(i=start;i0xdcff){
/*NotaUTF-8bsurrogate,failwithoriginalexception*/
PyErr_SetObject(PyExceptionInstance_Class(exc),exc);
Py_DECREF(res);
Py_DECREF(object);
returnNULL;
}
*outp++=ch-0xdc00;
}
restuple=Py_BuildValue("(On)",res,end);
Py_DECREF(res);
Py_DECREF(object);
returnrestuple;
}
elseif(PyObject_TypeCheck(exc,(PyTypeObject*)PyExc_UnicodeDecodeError)){
PyObject*str;
constunsignedchar*p;
Py_UCS2ch[4];/*decodeupto4badbytes.*/
intconsumed=0;
if(PyUnicodeDecodeError_GetStart(exc,&start))
returnNULL;
if(PyUnicodeDecodeError_GetEnd(exc,&end))
returnNULL;
if(!(object=PyUnicodeDecodeError_GetObject(exc)))
returnNULL;
p=(constunsignedchar*)PyBytes_AS_STRING(object);
while(consumed<4&&consumedcodec_search_path!=NULL)
return0;
interp->codec_search_path=PyList_New(0);
if(interp->codec_search_path==NULL){
return-1;
}
interp->codec_search_cache=PyDict_New();
if(interp->codec_search_cache==NULL){
return-1;
}
interp->codec_error_registry=PyDict_New();
if(interp->codec_error_registry==NULL){
return-1;
}
for(size_ti=0;icodecs_initialized=1;
return0;
}
Copylines
Copypermalink
Viewgitblame
Referenceinnewissue
Go
Youcan’tperformthatactionatthistime.
Yousignedinwithanothertaborwindow.Reloadtorefreshyoursession.
Yousignedoutinanothertaborwindow.Reloadtorefreshyoursession.