Project: opensauce-python Author: voicesauce File: textgrid.py License: ... If no encoding is specified, then the default of 'utf-8' will be returned.
SearchbyModule
SearchbyWords
SearchProjects
MostPopular
TopPythonAPIs
PopularProjects
Java
Python
JavaScript
TypeScript
C++
Scala
Blog
reportthisad
reportthisad
reportthisad
Pythondetectencoding
60Pythoncodeexamplesarefoundrelatedto"
detectencoding".
Youcanvoteuptheonesyoulikeorvotedowntheonesyoudon'tlike,
andgototheoriginalprojectorsourcefilebyfollowingthelinksaboveeachexample.
Example1
Project:
tabulator-py
Author:frictionlessdata
File:
helpers.py
License:
MITLicense
7
votes
defdetect_encoding(sample,encoding=None):
"""Detectencodingofabytestringsample.
"""
#Toreducetabulatorimporttime
try:
fromcchardetimportdetect
exceptImportError:
fromchardetimportdetect
ifencodingisnotNone:
returnnormalize_encoding(sample,encoding)
result=detect(sample)
confidence=result['confidence']or0
encoding=result['encoding']or'ascii'
encoding=normalize_encoding(sample,encoding)
ifconfidence=(3,0):
returnopen(file_path,"r",encoding=detect_encoding(file_path).get('encoding','utf-8'))
else:
returnopen(file_path,"r")
Example4
Project:
OasisLMF
Author:OasisLMF
File:
data.py
License:
BSD3-Clause"New"or"Revised"License
6
votes
defdetect_encoding(filepath):
"""
GivenapathtoaCSVofunknownencoding
readlinestodetectsitsencodingtype
:paramfilepath:Filepathtocheck
:typefilepath:str
:return:Example`{'encoding':'ISO-8859-1','confidence':0.73,'language':''}`
:rtype:dict
"""
detector=UniversalDetector()
withio.open(filepath,'rb')asf:
forlineinf:
detector.feed(line)
ifdetector.done:
break
detector.close()
returndetector.result
Example5
Project:
Turing
Author:TuringApp
File:
file.py
License:
MITLicense
6
votes
defdetect_encoding(self,path):
"""
FortheimplementationofencodingdefinitionsinPython,lookat:
-http://www.python.org/dev/peps/pep-0263/
..note::codetakenandadaptedfrom
```jedi.common.source_to_unicode.detect_encoding```
"""
withopen(path,'rb')asfile:
source=file.read()
#takecareoflineencodings(notinjedi)
source=source.replace(b'\r',b'')
source_str=str(source).replace('\\n','\n')
byte_mark=ast.literal_eval(r"b'\xef\xbb\xbf'")
ifsource.startswith(byte_mark):
#UTF-8byte-ordermark
return'utf-8'
first_two_lines=re.match(r'(?:[^\n]*\n){0,2}',source_str).group(0)
possible_encoding=re.search(r"coding[=:]\s*([-\w.]+)",
first_two_lines)
ifpossible_encoding:
returnpossible_encoding.group(1)
return'UTF-8'
Example6
Project:
win-unicode-console
Author:Drekin
File:
tokenize_open.py
License:
MITLicense
6
votes
defdetect_encoding(readline):
"""
Thedetect_encoding()functionisusedtodetecttheencodingthatshould
beusedtodecodeaPythonsourcefile.Itrequiresoneargument,readline,
inthesamewayasthetokenize()generator.
Itwillcallreadlineamaximumoftwice,andreturntheencodingused
(asastring)andalistofanylines(leftasbytes)ithasreadin.
Itdetectstheencodingfromthepresenceofautf-8bomoranencoding
cookieasspecifiedinpep-0263.Ifbothabomandacookiearepresent,
butdisagree,aSyntaxErrorwillberaised.Iftheencodingcookieisan
invalidcharset,raiseaSyntaxError.Notethatifautf-8bomisfound,
'utf-8-sig'isreturned.
Ifnoencodingisspecified,thenthedefaultof'utf-8'willbereturned.
"""
returndetect_encoding_ex(readline)[:2]
Example7
Project:
Wordless
Author:BLKSerene
File:
wl_detection.py
License:
GNUGeneralPublicLicensev3.0
5
votes
defdetect_encoding(main,file_path):
text=b''
success=True
withopen(file_path,'rb')asf:
ifmain.settings_custom['auto_detection']['detection_settings']['number_lines_no_limit']:
forlineinf:
text+=line
else:
fori,lineinenumerate(f):
ifithefilehasneverbeensaved
#AtthesametimefNamemayexist,i.e.anewfileoverwritestheexisting
#one.
ifos.path.isabs(fName)andos.path.exists(fName)and\
editor.encodingisnotNone:
returndetectExistingFileWriteEncoding(editor,fName)
returndetectNewFileWriteEncoding(editor,fName)
Example15
Project:
ingestors
Author:occrp-attic
File:
encoding.py
License:
MITLicense
5
votes
defdetect_list_encoding(self,items,default=DEFAULT_ENCODING):
detector=chardet.UniversalDetector()
fortextinitems:
ifnotisinstance(text,bytes):
continue
detector.feed(text)
ifdetector.done:
break
detector.close()
returnnormalize_result(detector.result,default)
Example16
Project:
Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda
Author:PacktPublishing
File:
inputstream.py
License:
MITLicense
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingin("utf-16","utf-16-be","utf-16-le"):
encoding="utf-8"
returnencoding
Example17
Project:
nzb-subliminal
Author:caronc
File:
inputstream.py
License:
GNUGeneralPublicLicensev3.0
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingin("utf-16","utf-16-be","utf-16-le"):
encoding="utf-8"
returnencoding
Example18
Project:
recruit
Author:Frank-qlu
File:
_inputstream.py
License:
ApacheLicense2.0
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example19
Project:
anpr
Author:italia
File:
_inputstream.py
License:
CreativeCommonsAttribution4.0International
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example20
Project:
vnpy_crypto
Author:birforce
File:
_inputstream.py
License:
MITLicense
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example21
Project:
Hands-On-Application-Development-with-PyCharm
Author:PacktPublishing
File:
_inputstream.py
License:
MITLicense
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example22
Project:
V1EngineeringInc-Docs
Author:V1EngineeringInc
File:
_inputstream.py
License:
CreativeCommonsAttributionShareAlike4.0International
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example23
Project:
pykit
Author:bsc-s2
File:
__init__.py
License:
MITLicense
5
votes
defdetect_encoding(b):
bstartswith=b.startswith
ifbstartswith((codecs.BOM_UTF32_BE,codecs.BOM_UTF32_LE)):
return'utf-32'
ifbstartswith((codecs.BOM_UTF16_BE,codecs.BOM_UTF16_LE)):
return'utf-16'
ifbstartswith(codecs.BOM_UTF8):
return'utf-8-sig'
iflen(b)>=4:
ifnotb[0]:
#0000-----utf-32-be
#00XX-----utf-16-be
return'utf-16-be'ifb[1]else'utf-32-be'
ifnotb[1]:
#XX000000-utf-32-le
#XX0000XX-utf-16-le
#XX00XX---utf-16-le
return'utf-16-le'ifb[2]orb[3]else'utf-32-le'
eliflen(b)==2:
ifnotb[0]:
#00XX-utf-16-be
return'utf-16-be'
ifnotb[1]:
#XX00-utf-16-le
return'utf-16-le'
#default
return'utf-8'
Example24
Project:
CogAlg
Author:boris-kz
File:
_inputstream.py
License:
MITLicense
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example25
Project:
Splunking-Crime
Author:nccgroup
File:
inputstream.py
License:
GNUAfferoGeneralPublicLicensev3.0
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingin("utf-16","utf-16-be","utf-16-le"):
encoding="utf-8"
returnencoding
Example26
Project:
codimension
Author:SergeySatskiy
File:
encoding.py
License:
GNUGeneralPublicLicensev3.0
5
votes
defdetectFileEncodingToRead(fName,text=None):
"""Detectsthereadencoding"""
iftextisNone:
withopen(fName,'rb')asdiskfile:
text=diskfile.read(1024)
#Step1:checkforBOM
iftext.startswith(BOM_UTF8):
return'bom-utf-8'
iftext.startswith(BOM_UTF16):
return'bom-utf-16'
iftext.startswith(BOM_UTF32):
return'bom-utf-32'
#Checkifitwasauserassignedencoding
userAssignedEncoding=getFileEncoding(fName)
ifuserAssignedEncoding:
returnuserAssignedEncoding
#Step3:extractencodingfromthefile
encFromFile=getCodingFromBytes(text)
ifencFromFile:
returnencFromFile
#Step4:checktheprojectdefaultencoding
project=GlobalData().project
ifproject.isLoaded():
projectEncoding=project.props['encoding']
ifprojectEncoding:
returnprojectEncoding
#Step5:checkstheIDEencoding
ideEncoding=Settings()['encoding']
ifideEncoding:
returnideEncoding
#Step6:default
returnDEFAULT_ENCODING
Example27
Project:
GraphicDesignPatternByPython
Author:Relph1119
File:
_inputstream.py
License:
MITLicense
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example28
Project:
pySINDy
Author:luckystarufo
File:
_inputstream.py
License:
MITLicense
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example29
Project:
elasticintel
Author:securityclippy
File:
console.py
License:
GNUGeneralPublicLicensev3.0
5
votes
defdetect_console_encoding():
"""
Trytofindthemostcapableencodingsupportedbytheconsole.
slighlymodifiedfromthewayIPythonhandlesthesameissue.
"""
global_initial_defencoding
encoding=None
try:
encoding=sys.stdout.encodingorsys.stdin.encoding
exceptAttributeError:
pass
#tryagainforsomethingbetter
ifnotencodingor'ascii'inencoding.lower():
try:
encoding=locale.getpreferredencoding()
exceptException:
pass
#whenallelsefails.thiswillusuallybe"ascii"
ifnotencodingor'ascii'inencoding.lower():
encoding=sys.getdefaultencoding()
#GH3360,savethereporteddefencodingatimporttime
#MPLbackendsmaychangeit.Makeavailablefordebugging.
ifnot_initial_defencoding:
_initial_defencoding=sys.getdefaultencoding()
returnencoding
Example30
Project:
bazarr
Author:morpheus65535
File:
_inputstream.py
License:
GNUGeneralPublicLicensev3.0
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example31
Project:
wfuzz
Author:xmendez
File:
utils.py
License:
GNUGeneralPublicLicensev2.0
5
votes
defdetect_encoding(self):
detector=UniversalDetector()
detector.reset()
forlineinself.file_des:
detector.feed(line)
self.cache.append(line)
ifdetector.done:
break
detector.close()
returndetector.result
Example32
Project:
partridge
Author:remix
File:
utilities.py
License:
MITLicense
5
votes
defdetect_encoding(f:BinaryIO,limit:int=2500)->str:
"""
Returnencodingofprovidedinputstream.
Mostofthetimeit'sunicode,butifweareunabletodecodetheinput
natively,use`chardet`todeterminetheencodingheuristically.
"""
unicode_decodable=True
forline_no,lineinenumerate(f):
try:
line.decode("utf-8")
exceptUnicodeDecodeError:
unicode_decodable=False
break
ifline_no>limit:
break
ifunicode_decodable:
return"utf-8"
f.seek(0)
u=UniversalDetector()
forline_no,lineinenumerate(f):
u.feed(line)
ifu.doneorline_no>limit:
break
u.close()
returnu.result["encoding"]
Example33
Project:
philter-ucsf
Author:BCHSI
File:
philter.py
License:
BSD3-Clause"New"or"Revised"License
5
votes
defdetect_encoding(self,fp):
ifnotos.path.exists(fp):
raiseException("Filepathdoesnotexist",fp)
detector=UniversalDetector()
withopen(fp,"rb")asf:
forlineinf:
detector.feed(line)
ifdetector.done:
break
detector.close()
returndetector.result
Example34
Project:
stopstalk-deployment
Author:stopstalk
File:
_inputstream.py
License:
MITLicense
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example35
Project:
Ansible
Author:mrlesmithjr
File:
_inputstream.py
License:
MITLicense
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example36
Project:
datafari
Author:francelabs
File:
inputstream.py
License:
ApacheLicense2.0
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingin("utf-16","utf-16-be","utf-16-le"):
encoding="utf-8"
returnencoding
Example37
Project:
hacktoberfest2018
Author:ambujraj
File:
_inputstream.py
License:
GNUGeneralPublicLicensev3.0
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example38
Project:
Imogen
Author:CedricGuillemet
File:
__init__.py
License:
MITLicense
5
votes
defdetect_encoding(b):
bstartswith=b.startswith
ifbstartswith((codecs.BOM_UTF32_BE,codecs.BOM_UTF32_LE)):
return'utf-32'
ifbstartswith((codecs.BOM_UTF16_BE,codecs.BOM_UTF16_LE)):
return'utf-16'
ifbstartswith(codecs.BOM_UTF8):
return'utf-8-sig'
iflen(b)>=4:
ifnotb[0]:
#0000-----utf-32-be
#00XX-----utf-16-be
return'utf-16-be'ifb[1]else'utf-32-be'
ifnotb[1]:
#XX000000-utf-32-le
#XX0000XX-utf-16-le
#XX00XX---utf-16-le
return'utf-16-le'ifb[2]orb[3]else'utf-32-le'
eliflen(b)==2:
ifnotb[0]:
#00XX-utf-16-be
return'utf-16-be'
ifnotb[1]:
#XX00-utf-16-le
return'utf-16-le'
#default
return'utf-8'
Example39
Project:
trafilatura
Author:adbar
File:
utils.py
License:
GNUGeneralPublicLicensev3.0
5
votes
defdetect_encoding(bytesobject):
"""Readthefirstchunkofinputandreturnitsencoding"""
#unicode-test
ifisutf8(bytesobject):
return'UTF-8'
#tryoneoftheinstalleddetectors
ifcchardetisnotNone:
guess=cchardet.detect(bytesobject)
LOGGER.debug('guessedencoding:%s',guess['encoding'])
returnguess['encoding']
#fallbackonfullresponse
#ifguessisNoneorguess['encoding']isNone:#orguess['confidence']<0.99:
#guessed_encoding=chardet.detect(bytesobject)['encoding']
#return
returnNone
Example40
Project:
android_universal
Author:bkerler
File:
__init__.py
License:
MITLicense
5
votes
defdetect_encoding(data):
"""DetectwhichUTFcodecwasusedtoencodethegivenbytes.
ThelatestJSONstandard(:rfc:`8259`)suggeststhatonlyUTF-8is
accepted.Olderdocumentsallowed8,16,or32.16and32canbebig
orlittleendian.SomeeditorsorlibrariesmayprependaBOM.
:paramdata:BytesinunknownUTFencoding.
:return:UTFencodingname
"""
head=data[:4]
ifhead[:3]==codecs.BOM_UTF8:
return'utf-8-sig'
ifb'\x00'notinhead:
return'utf-8'
ifheadin(codecs.BOM_UTF32_BE,codecs.BOM_UTF32_LE):
return'utf-32'
ifhead[:2]in(codecs.BOM_UTF16_BE,codecs.BOM_UTF16_LE):
return'utf-16'
iflen(head)==4:
ifhead[:3]==b'\x00\x00\x00':
return'utf-32-be'
ifhead[::2]==b'\x00\x00':
return'utf-16-be'
ifhead[1:]==b'\x00\x00\x00':
return'utf-32-le'
ifhead[1::2]==b'\x00\x00':
return'utf-16-le'
iflen(head)==2:
return'utf-16-be'ifhead.startswith(b'\x00')else'utf-16-le'
return'utf-8'
Example41
Project:
talon
Author:mailgun
File:
utils.py
License:
ApacheLicense2.0
5
votes
defdetect_encoding(string):
"""
Triestodetecttheencodingofthepassedstring.
DefaultstoUTF-8.
"""
assertisinstance(string,bytes)
try:
detected=chardet.detect(string)
ifdetected:
returndetected.get('encoding')or'utf-8'
exceptExceptionase:
pass
return'utf-8'
Example42
Project:
Mastering-Elasticsearch-7.0
Author:PacktPublishing
File:
_inputstream.py
License:
MITLicense
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example43
Project:
android_universal
Author:bkerler
File:
_inputstream.py
License:
MITLicense
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example44
Project:
pipenv
Author:pypa
File:
_inputstream.py
License:
MITLicense
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example45
Project:
OpenTrader
Author:OpenTrading
File:
tabview.py
License:
GNULesserGeneralPublicLicensev3.0
5
votes
defdetect_encoding(data=None):
"""Returnthedefaultsystemencoding.Ifdataispassed,try
todecodethedatawiththedefaultsystemencodingorfromashort
listofencodingtypestotest.
Args:
data-listoflists
Returns:
enc-systemencoding
"""
enc_list=['utf-8','latin-1','iso8859-1','iso8859-2',
'utf-16','cp720']
code=locale.getpreferredencoding(False)
ifdataisNone:
returncode
ifcode.lower()notinenc_list:
enc_list.insert(0,code.lower())
forcinenc_list:
try:
forlineindata:
line.decode(c)
except(UnicodeDecodeError,UnicodeError,AttributeError):
continue
returnc
print("Encodingnotdetected.Pleasepassencodingvaluemanually")
Example46
Project:
python-netsurv
Author:sofia-netsurv
File:
autopep8.py
License:
MITLicense
5
votes
defdetect_encoding(filename,limit_byte_check=-1):
"""Returnfileencoding."""
try:
withopen(filename,'rb')asinput_file:
fromlib2to3.pgen2importtokenizeaslib2to3_tokenize
encoding=lib2to3_tokenize.detect_encoding(input_file.readline)[0]
withopen_with_encoding(filename,encoding=encoding)astest_file:
test_file.read(limit_byte_check)
returnencoding
except(LookupError,SyntaxError,UnicodeDecodeError):
return'latin-1'
Example47
Project:
jbox
Author:jpush
File:
inputstream.py
License:
MITLicense
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingin("utf-16","utf-16-be","utf-16-le"):
encoding="utf-8"
returnencoding
Example48
Project:
ru
Author:seppius-xbmc-repo
File:
inputstream.py
License:
GNUGeneralPublicLicensev2.0
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingin("utf-16","utf-16-be","utf-16-le"):
encoding="utf-8"
returnencoding
Example49
Project:
PyDev.Debugger
Author:fabioz
File:
autopep8.py
License:
EclipsePublicLicense1.0
5
votes
defdetect_encoding(filename,limit_byte_check=-1):
"""Returnfileencoding."""
try:
withopen(filename,'rb')asinput_file:
fromlib2to3.pgen2importtokenizeaslib2to3_tokenize
encoding=lib2to3_tokenize.detect_encoding(input_file.readline)[0]
withopen_with_encoding(filename,encoding)astest_file:
test_file.read(limit_byte_check)
returnencoding
except(LookupError,SyntaxError,UnicodeDecodeError):
return'latin-1'
Example50
Project:
rssant
Author:anyant
File:
response_builder.py
License:
BSD3-Clause"New"or"Revised"License
5
votes
defdetect_content_encoding(content:bytes,http_encoding:str=None):
"""
>>>detect_content_encoding(b'hello','text/xml;charset=utf-8')
'utf-8'
>>>detect_content_encoding(b'hello','text/xml;charset=unknown')
'utf-8'
>>>content=''.encode('utf-8')
>>>detect_content_encoding(content)
'utf-8'
>>>detect_content_encoding("你好".encode('utf-8'))
'utf-8'
"""
content=bytes(content[:2000])#onlyneedpeekpartialcontent
checker=EncodingChecker(content)
ifhttp_encoding:
encoding=checker.check(http_encoding)
ifencodingisnotNone:
returnencoding
encoding=checker.check(_detect_json_encoding(content))
ifencodingisnotNone:
returnencoding
encoding=checker.check(_detect_xml_encoding(content))
ifencodingisnotNone:
returnencoding
encoding=checker.check(_detect_chardet_encoding(content))
ifencodingisnotNone:
returnencoding
return'utf-8'
Example51
Project:
PhonePi_SampleServer
Author:priyankark
File:
_inputstream.py
License:
MITLicense
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example52
Project:
twitter-stock-recommendation
Author:alvarobartt
File:
_inputstream.py
License:
MITLicense
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example53
Project:
nzb-subliminal
Author:caronc
File:
inputstream.py
License:
GNUGeneralPublicLicensev3.0
5
votes
defdetectEncoding(self,parseMeta=True,chardet=True):
#FirstlookforaBOM
#ThiswillalsoreadpasttheBOMifpresent
encoding=self.detectBOM()
confidence="certain"
#IfthereisnoBOMneedtolookformetaelementswithencoding
#information
ifencodingisNoneandparseMeta:
encoding=self.detectEncodingMeta()
confidence="tentative"
#Guesswithchardet,ifavaliable
ifencodingisNoneandchardet:
confidence="tentative"
try:
fromchardet.universaldetectorimportUniversalDetector
buffers=[]
detector=UniversalDetector()
whilenotdetector.done:
buffer=self.rawStream.read(self.numBytesChardet)
ifnotbuffer:
break
buffers.append(buffer)
detector.feed(buffer)
detector.close()
encoding=detector.result['encoding']
self.rawStream.seek(0)
exceptImportError:
pass
#Ifallelsefailsusethedefaultencoding
ifencodingisNone:
confidence="tentative"
encoding=self.defaultEncoding
#Substituteforequivalentencodings:
encodingSub={"iso-8859-1":"windows-1252"}
ifencoding.lower()inencodingSub:
encoding=encodingSub[encoding.lower()]
returnencoding,confidence
Example54
Project:
Python24
Author:HaoZhang95
File:
_inputstream.py
License:
MITLicense
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example55
Project:
Computable
Author:ktraunmueller
File:
format.py
License:
MITLicense
5
votes
defdetect_console_encoding():
"""
Trytofindthemostcapableencodingsupportedbytheconsole.
slighlymodifiedfromthewayIPythonhandlesthesameissue.
"""
importlocale
global_initial_defencoding
encoding=None
try:
encoding=sys.stdout.encodingorsys.stdin.encoding
exceptAttributeError:
pass
#tryagainforsomethingbetter
ifnotencodingor'ascii'inencoding.lower():
try:
encoding=locale.getpreferredencoding()
exceptException:
pass
#whenallelsefails.thiswillusuallybe"ascii"
ifnotencodingor'ascii'inencoding.lower():
encoding=sys.getdefaultencoding()
#GH3360,savethereporteddefencodingatimporttime
#MPLbackendsmaychangeit.Makeavailablefordebugging.
ifnot_initial_defencoding:
_initial_defencoding=sys.getdefaultencoding()
returnencoding
Example56
Project:
fxxkpython
Author:wistbean
File:
_inputstream.py
License:
GNUGeneralPublicLicensev3.0
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"):
encoding=lookupEncoding("utf-8")
returnencoding
Example57
Project:
vnpy_crypto
Author:birforce
File:
console.py
License:
MITLicense
5
votes
defdetect_console_encoding():
"""
Trytofindthemostcapableencodingsupportedbytheconsole.
slightlymodifiedfromthewayIPythonhandlesthesameissue.
"""
global_initial_defencoding
encoding=None
try:
encoding=sys.stdout.encodingorsys.stdin.encoding
exceptAttributeError:
pass
#tryagainforsomethingbetter
ifnotencodingor'ascii'inencoding.lower():
try:
encoding=locale.getpreferredencoding()
exceptException:
pass
#whenallelsefails.thiswillusuallybe"ascii"
ifnotencodingor'ascii'inencoding.lower():
encoding=sys.getdefaultencoding()
#GH3360,savethereporteddefencodingatimporttime
#MPLbackendsmaychangeit.Makeavailablefordebugging.
ifnot_initial_defencoding:
_initial_defencoding=sys.getdefaultencoding()
returnencoding
Example58
Project:
Offensive-Security-Certified-Professional
Author:StevenDias33
File:
padding-oracle-tests.py
License:
MITLicense
5
votes
defdetect_encoding(self):
b64url='^[a-zA-Z0-9_\-]+={0,2}$'
b64std='^[a-zA-Z0-9\+\/]+={0,2}$'
hexenc1='^[0-9a-f]+$'
hexenc2='^[0-9A-F]+$'
data=self.data
ifre.search('%[0-9a-f]{2}',self.data,re.I)!=None:
dbg('Sampleisurl-encoded.')
data=urllib.unquote_plus(data)
self.urlencoded=True
if(re.match(hexenc1,data)orre.match(hexenc2,data))andlen(data)%2==0:
dbg('Hexencodingdetected.')
returnself.HEXENC
ifre.match(b64url,data):
dbg('Base64urlencodingdetected.')
returnself.B64URL
ifre.match(b64std,data):
dbg('StandardBase64encodingdetected.')
returnself.B64STD
error('Warning:Couldnotdetectdataencoding.Goingwithplaindata.')
returnself.NONE
Example59
Project:
MARA_Framework
Author:xtiankisutsa
File:
inputstream.py
License:
GNULesserGeneralPublicLicensev3.0
5
votes
defdetectEncodingMeta(self):
"""Reporttheencodingdeclaredbythemetaelement
"""
buffer=self.rawStream.read(self.numBytesMeta)
assertisinstance(buffer,bytes)
parser=EncodingParser(buffer)
self.rawStream.seek(0)
encoding=parser.getEncoding()
ifencodingin("utf-16","utf-16-be","utf-16-le"):
encoding="utf-8"
returnencoding
Example60
Project:
filmkodi
Author:mrknow
File:
autopep8.py
License:
ApacheLicense2.0
5
votes
defdetect_encoding(filename):
"""Returnfileencoding."""
try:
withopen(filename,'rb')asinput_file:
check_lib2to3()
fromlib2to3.pgen2importtokenizeaslib2to3_tokenize
encoding=lib2to3_tokenize.detect_encoding(input_file.readline)[0]
#Checkforcorrectnessofencoding
withopen_with_encoding(filename,encoding)astest_file:
test_file.read()
returnencoding
except(LookupError,SyntaxError,UnicodeDecodeError):
return'latin-1'
reportthisad