Python detect encoding - ProgramCreek.com

文章推薦指數: 80 %
投票人數:10人

Project: opensauce-python Author: voicesauce File: textgrid.py License: ... If no encoding is specified, then the default of 'utf-8' will be returned. SearchbyModule SearchbyWords SearchProjects MostPopular TopPythonAPIs PopularProjects Java Python JavaScript TypeScript C++ Scala Blog reportthisad reportthisad reportthisad Pythondetectencoding 60Pythoncodeexamplesarefoundrelatedto" detectencoding". Youcanvoteuptheonesyoulikeorvotedowntheonesyoudon'tlike, andgototheoriginalprojectorsourcefilebyfollowingthelinksaboveeachexample. Example1 Project: tabulator-py   Author:frictionlessdata   File: helpers.py   License: MITLicense 7 votes defdetect_encoding(sample,encoding=None): """Detectencodingofabytestringsample. """ #Toreducetabulatorimporttime try: fromcchardetimportdetect exceptImportError: fromchardetimportdetect ifencodingisnotNone: returnnormalize_encoding(sample,encoding) result=detect(sample) confidence=result['confidence']or0 encoding=result['encoding']or'ascii' encoding=normalize_encoding(sample,encoding) ifconfidence=(3,0): returnopen(file_path,"r",encoding=detect_encoding(file_path).get('encoding','utf-8')) else: returnopen(file_path,"r") Example4 Project: OasisLMF   Author:OasisLMF   File: data.py   License: BSD3-Clause"New"or"Revised"License 6 votes defdetect_encoding(filepath): """ GivenapathtoaCSVofunknownencoding readlinestodetectsitsencodingtype :paramfilepath:Filepathtocheck :typefilepath:str :return:Example`{'encoding':'ISO-8859-1','confidence':0.73,'language':''}` :rtype:dict """ detector=UniversalDetector() withio.open(filepath,'rb')asf: forlineinf: detector.feed(line) ifdetector.done: break detector.close() returndetector.result Example5 Project: Turing   Author:TuringApp   File: file.py   License: MITLicense 6 votes defdetect_encoding(self,path): """ FortheimplementationofencodingdefinitionsinPython,lookat: -http://www.python.org/dev/peps/pep-0263/ ..note::codetakenandadaptedfrom ```jedi.common.source_to_unicode.detect_encoding``` """ withopen(path,'rb')asfile: source=file.read() #takecareoflineencodings(notinjedi) source=source.replace(b'\r',b'') source_str=str(source).replace('\\n','\n') byte_mark=ast.literal_eval(r"b'\xef\xbb\xbf'") ifsource.startswith(byte_mark): #UTF-8byte-ordermark return'utf-8' first_two_lines=re.match(r'(?:[^\n]*\n){0,2}',source_str).group(0) possible_encoding=re.search(r"coding[=:]\s*([-\w.]+)", first_two_lines) ifpossible_encoding: returnpossible_encoding.group(1) return'UTF-8' Example6 Project: win-unicode-console   Author:Drekin   File: tokenize_open.py   License: MITLicense 6 votes defdetect_encoding(readline): """ Thedetect_encoding()functionisusedtodetecttheencodingthatshould beusedtodecodeaPythonsourcefile.Itrequiresoneargument,readline, inthesamewayasthetokenize()generator. Itwillcallreadlineamaximumoftwice,andreturntheencodingused (asastring)andalistofanylines(leftasbytes)ithasreadin. Itdetectstheencodingfromthepresenceofautf-8bomoranencoding cookieasspecifiedinpep-0263.Ifbothabomandacookiearepresent, butdisagree,aSyntaxErrorwillberaised.Iftheencodingcookieisan invalidcharset,raiseaSyntaxError.Notethatifautf-8bomisfound, 'utf-8-sig'isreturned. Ifnoencodingisspecified,thenthedefaultof'utf-8'willbereturned. """ returndetect_encoding_ex(readline)[:2] Example7 Project: Wordless   Author:BLKSerene   File: wl_detection.py   License: GNUGeneralPublicLicensev3.0 5 votes defdetect_encoding(main,file_path): text=b'' success=True withopen(file_path,'rb')asf: ifmain.settings_custom['auto_detection']['detection_settings']['number_lines_no_limit']: forlineinf: text+=line else: fori,lineinenumerate(f): ifithefilehasneverbeensaved #AtthesametimefNamemayexist,i.e.anewfileoverwritestheexisting #one. ifos.path.isabs(fName)andos.path.exists(fName)and\ editor.encodingisnotNone: returndetectExistingFileWriteEncoding(editor,fName) returndetectNewFileWriteEncoding(editor,fName) Example15 Project: ingestors   Author:occrp-attic   File: encoding.py   License: MITLicense 5 votes defdetect_list_encoding(self,items,default=DEFAULT_ENCODING): detector=chardet.UniversalDetector() fortextinitems: ifnotisinstance(text,bytes): continue detector.feed(text) ifdetector.done: break detector.close() returnnormalize_result(detector.result,default) Example16 Project: Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda   Author:PacktPublishing   File: inputstream.py   License: MITLicense 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingin("utf-16","utf-16-be","utf-16-le"): encoding="utf-8" returnencoding Example17 Project: nzb-subliminal   Author:caronc   File: inputstream.py   License: GNUGeneralPublicLicensev3.0 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingin("utf-16","utf-16-be","utf-16-le"): encoding="utf-8" returnencoding Example18 Project: recruit   Author:Frank-qlu   File: _inputstream.py   License: ApacheLicense2.0 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example19 Project: anpr   Author:italia   File: _inputstream.py   License: CreativeCommonsAttribution4.0International 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example20 Project: vnpy_crypto   Author:birforce   File: _inputstream.py   License: MITLicense 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example21 Project: Hands-On-Application-Development-with-PyCharm   Author:PacktPublishing   File: _inputstream.py   License: MITLicense 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example22 Project: V1EngineeringInc-Docs   Author:V1EngineeringInc   File: _inputstream.py   License: CreativeCommonsAttributionShareAlike4.0International 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example23 Project: pykit   Author:bsc-s2   File: __init__.py   License: MITLicense 5 votes defdetect_encoding(b): bstartswith=b.startswith ifbstartswith((codecs.BOM_UTF32_BE,codecs.BOM_UTF32_LE)): return'utf-32' ifbstartswith((codecs.BOM_UTF16_BE,codecs.BOM_UTF16_LE)): return'utf-16' ifbstartswith(codecs.BOM_UTF8): return'utf-8-sig' iflen(b)>=4: ifnotb[0]: #0000-----utf-32-be #00XX-----utf-16-be return'utf-16-be'ifb[1]else'utf-32-be' ifnotb[1]: #XX000000-utf-32-le #XX0000XX-utf-16-le #XX00XX---utf-16-le return'utf-16-le'ifb[2]orb[3]else'utf-32-le' eliflen(b)==2: ifnotb[0]: #00XX-utf-16-be return'utf-16-be' ifnotb[1]: #XX00-utf-16-le return'utf-16-le' #default return'utf-8' Example24 Project: CogAlg   Author:boris-kz   File: _inputstream.py   License: MITLicense 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example25 Project: Splunking-Crime   Author:nccgroup   File: inputstream.py   License: GNUAfferoGeneralPublicLicensev3.0 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingin("utf-16","utf-16-be","utf-16-le"): encoding="utf-8" returnencoding Example26 Project: codimension   Author:SergeySatskiy   File: encoding.py   License: GNUGeneralPublicLicensev3.0 5 votes defdetectFileEncodingToRead(fName,text=None): """Detectsthereadencoding""" iftextisNone: withopen(fName,'rb')asdiskfile: text=diskfile.read(1024) #Step1:checkforBOM iftext.startswith(BOM_UTF8): return'bom-utf-8' iftext.startswith(BOM_UTF16): return'bom-utf-16' iftext.startswith(BOM_UTF32): return'bom-utf-32' #Checkifitwasauserassignedencoding userAssignedEncoding=getFileEncoding(fName) ifuserAssignedEncoding: returnuserAssignedEncoding #Step3:extractencodingfromthefile encFromFile=getCodingFromBytes(text) ifencFromFile: returnencFromFile #Step4:checktheprojectdefaultencoding project=GlobalData().project ifproject.isLoaded(): projectEncoding=project.props['encoding'] ifprojectEncoding: returnprojectEncoding #Step5:checkstheIDEencoding ideEncoding=Settings()['encoding'] ifideEncoding: returnideEncoding #Step6:default returnDEFAULT_ENCODING Example27 Project: GraphicDesignPatternByPython   Author:Relph1119   File: _inputstream.py   License: MITLicense 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example28 Project: pySINDy   Author:luckystarufo   File: _inputstream.py   License: MITLicense 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example29 Project: elasticintel   Author:securityclippy   File: console.py   License: GNUGeneralPublicLicensev3.0 5 votes defdetect_console_encoding(): """ Trytofindthemostcapableencodingsupportedbytheconsole. slighlymodifiedfromthewayIPythonhandlesthesameissue. """ global_initial_defencoding encoding=None try: encoding=sys.stdout.encodingorsys.stdin.encoding exceptAttributeError: pass #tryagainforsomethingbetter ifnotencodingor'ascii'inencoding.lower(): try: encoding=locale.getpreferredencoding() exceptException: pass #whenallelsefails.thiswillusuallybe"ascii" ifnotencodingor'ascii'inencoding.lower(): encoding=sys.getdefaultencoding() #GH3360,savethereporteddefencodingatimporttime #MPLbackendsmaychangeit.Makeavailablefordebugging. ifnot_initial_defencoding: _initial_defencoding=sys.getdefaultencoding() returnencoding Example30 Project: bazarr   Author:morpheus65535   File: _inputstream.py   License: GNUGeneralPublicLicensev3.0 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example31 Project: wfuzz   Author:xmendez   File: utils.py   License: GNUGeneralPublicLicensev2.0 5 votes defdetect_encoding(self): detector=UniversalDetector() detector.reset() forlineinself.file_des: detector.feed(line) self.cache.append(line) ifdetector.done: break detector.close() returndetector.result Example32 Project: partridge   Author:remix   File: utilities.py   License: MITLicense 5 votes defdetect_encoding(f:BinaryIO,limit:int=2500)->str: """ Returnencodingofprovidedinputstream. Mostofthetimeit'sunicode,butifweareunabletodecodetheinput natively,use`chardet`todeterminetheencodingheuristically. """ unicode_decodable=True forline_no,lineinenumerate(f): try: line.decode("utf-8") exceptUnicodeDecodeError: unicode_decodable=False break ifline_no>limit: break ifunicode_decodable: return"utf-8" f.seek(0) u=UniversalDetector() forline_no,lineinenumerate(f): u.feed(line) ifu.doneorline_no>limit: break u.close() returnu.result["encoding"] Example33 Project: philter-ucsf   Author:BCHSI   File: philter.py   License: BSD3-Clause"New"or"Revised"License 5 votes defdetect_encoding(self,fp): ifnotos.path.exists(fp): raiseException("Filepathdoesnotexist",fp) detector=UniversalDetector() withopen(fp,"rb")asf: forlineinf: detector.feed(line) ifdetector.done: break detector.close() returndetector.result Example34 Project: stopstalk-deployment   Author:stopstalk   File: _inputstream.py   License: MITLicense 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example35 Project: Ansible   Author:mrlesmithjr   File: _inputstream.py   License: MITLicense 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example36 Project: datafari   Author:francelabs   File: inputstream.py   License: ApacheLicense2.0 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingin("utf-16","utf-16-be","utf-16-le"): encoding="utf-8" returnencoding Example37 Project: hacktoberfest2018   Author:ambujraj   File: _inputstream.py   License: GNUGeneralPublicLicensev3.0 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example38 Project: Imogen   Author:CedricGuillemet   File: __init__.py   License: MITLicense 5 votes defdetect_encoding(b): bstartswith=b.startswith ifbstartswith((codecs.BOM_UTF32_BE,codecs.BOM_UTF32_LE)): return'utf-32' ifbstartswith((codecs.BOM_UTF16_BE,codecs.BOM_UTF16_LE)): return'utf-16' ifbstartswith(codecs.BOM_UTF8): return'utf-8-sig' iflen(b)>=4: ifnotb[0]: #0000-----utf-32-be #00XX-----utf-16-be return'utf-16-be'ifb[1]else'utf-32-be' ifnotb[1]: #XX000000-utf-32-le #XX0000XX-utf-16-le #XX00XX---utf-16-le return'utf-16-le'ifb[2]orb[3]else'utf-32-le' eliflen(b)==2: ifnotb[0]: #00XX-utf-16-be return'utf-16-be' ifnotb[1]: #XX00-utf-16-le return'utf-16-le' #default return'utf-8' Example39 Project: trafilatura   Author:adbar   File: utils.py   License: GNUGeneralPublicLicensev3.0 5 votes defdetect_encoding(bytesobject): """Readthefirstchunkofinputandreturnitsencoding""" #unicode-test ifisutf8(bytesobject): return'UTF-8' #tryoneoftheinstalleddetectors ifcchardetisnotNone: guess=cchardet.detect(bytesobject) LOGGER.debug('guessedencoding:%s',guess['encoding']) returnguess['encoding'] #fallbackonfullresponse #ifguessisNoneorguess['encoding']isNone:#orguess['confidence']<0.99: #guessed_encoding=chardet.detect(bytesobject)['encoding'] #return returnNone Example40 Project: android_universal   Author:bkerler   File: __init__.py   License: MITLicense 5 votes defdetect_encoding(data): """DetectwhichUTFcodecwasusedtoencodethegivenbytes. ThelatestJSONstandard(:rfc:`8259`)suggeststhatonlyUTF-8is accepted.Olderdocumentsallowed8,16,or32.16and32canbebig orlittleendian.SomeeditorsorlibrariesmayprependaBOM. :paramdata:BytesinunknownUTFencoding. :return:UTFencodingname """ head=data[:4] ifhead[:3]==codecs.BOM_UTF8: return'utf-8-sig' ifb'\x00'notinhead: return'utf-8' ifheadin(codecs.BOM_UTF32_BE,codecs.BOM_UTF32_LE): return'utf-32' ifhead[:2]in(codecs.BOM_UTF16_BE,codecs.BOM_UTF16_LE): return'utf-16' iflen(head)==4: ifhead[:3]==b'\x00\x00\x00': return'utf-32-be' ifhead[::2]==b'\x00\x00': return'utf-16-be' ifhead[1:]==b'\x00\x00\x00': return'utf-32-le' ifhead[1::2]==b'\x00\x00': return'utf-16-le' iflen(head)==2: return'utf-16-be'ifhead.startswith(b'\x00')else'utf-16-le' return'utf-8' Example41 Project: talon   Author:mailgun   File: utils.py   License: ApacheLicense2.0 5 votes defdetect_encoding(string): """ Triestodetecttheencodingofthepassedstring. DefaultstoUTF-8. """ assertisinstance(string,bytes) try: detected=chardet.detect(string) ifdetected: returndetected.get('encoding')or'utf-8' exceptExceptionase: pass return'utf-8' Example42 Project: Mastering-Elasticsearch-7.0   Author:PacktPublishing   File: _inputstream.py   License: MITLicense 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example43 Project: android_universal   Author:bkerler   File: _inputstream.py   License: MITLicense 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example44 Project: pipenv   Author:pypa   File: _inputstream.py   License: MITLicense 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example45 Project: OpenTrader   Author:OpenTrading   File: tabview.py   License: GNULesserGeneralPublicLicensev3.0 5 votes defdetect_encoding(data=None): """Returnthedefaultsystemencoding.Ifdataispassed,try todecodethedatawiththedefaultsystemencodingorfromashort listofencodingtypestotest. Args: data-listoflists Returns: enc-systemencoding """ enc_list=['utf-8','latin-1','iso8859-1','iso8859-2', 'utf-16','cp720'] code=locale.getpreferredencoding(False) ifdataisNone: returncode ifcode.lower()notinenc_list: enc_list.insert(0,code.lower()) forcinenc_list: try: forlineindata: line.decode(c) except(UnicodeDecodeError,UnicodeError,AttributeError): continue returnc print("Encodingnotdetected.Pleasepassencodingvaluemanually") Example46 Project: python-netsurv   Author:sofia-netsurv   File: autopep8.py   License: MITLicense 5 votes defdetect_encoding(filename,limit_byte_check=-1): """Returnfileencoding.""" try: withopen(filename,'rb')asinput_file: fromlib2to3.pgen2importtokenizeaslib2to3_tokenize encoding=lib2to3_tokenize.detect_encoding(input_file.readline)[0] withopen_with_encoding(filename,encoding=encoding)astest_file: test_file.read(limit_byte_check) returnencoding except(LookupError,SyntaxError,UnicodeDecodeError): return'latin-1' Example47 Project: jbox   Author:jpush   File: inputstream.py   License: MITLicense 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingin("utf-16","utf-16-be","utf-16-le"): encoding="utf-8" returnencoding Example48 Project: ru   Author:seppius-xbmc-repo   File: inputstream.py   License: GNUGeneralPublicLicensev2.0 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingin("utf-16","utf-16-be","utf-16-le"): encoding="utf-8" returnencoding Example49 Project: PyDev.Debugger   Author:fabioz   File: autopep8.py   License: EclipsePublicLicense1.0 5 votes defdetect_encoding(filename,limit_byte_check=-1): """Returnfileencoding.""" try: withopen(filename,'rb')asinput_file: fromlib2to3.pgen2importtokenizeaslib2to3_tokenize encoding=lib2to3_tokenize.detect_encoding(input_file.readline)[0] withopen_with_encoding(filename,encoding)astest_file: test_file.read(limit_byte_check) returnencoding except(LookupError,SyntaxError,UnicodeDecodeError): return'latin-1' Example50 Project: rssant   Author:anyant   File: response_builder.py   License: BSD3-Clause"New"or"Revised"License 5 votes defdetect_content_encoding(content:bytes,http_encoding:str=None): """ >>>detect_content_encoding(b'hello','text/xml;charset=utf-8') 'utf-8' >>>detect_content_encoding(b'hello','text/xml;charset=unknown') 'utf-8' >>>content=''.encode('utf-8') >>>detect_content_encoding(content) 'utf-8' >>>detect_content_encoding("你好".encode('utf-8')) 'utf-8' """ content=bytes(content[:2000])#onlyneedpeekpartialcontent checker=EncodingChecker(content) ifhttp_encoding: encoding=checker.check(http_encoding) ifencodingisnotNone: returnencoding encoding=checker.check(_detect_json_encoding(content)) ifencodingisnotNone: returnencoding encoding=checker.check(_detect_xml_encoding(content)) ifencodingisnotNone: returnencoding encoding=checker.check(_detect_chardet_encoding(content)) ifencodingisnotNone: returnencoding return'utf-8' Example51 Project: PhonePi_SampleServer   Author:priyankark   File: _inputstream.py   License: MITLicense 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example52 Project: twitter-stock-recommendation   Author:alvarobartt   File: _inputstream.py   License: MITLicense 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example53 Project: nzb-subliminal   Author:caronc   File: inputstream.py   License: GNUGeneralPublicLicensev3.0 5 votes defdetectEncoding(self,parseMeta=True,chardet=True): #FirstlookforaBOM #ThiswillalsoreadpasttheBOMifpresent encoding=self.detectBOM() confidence="certain" #IfthereisnoBOMneedtolookformetaelementswithencoding #information ifencodingisNoneandparseMeta: encoding=self.detectEncodingMeta() confidence="tentative" #Guesswithchardet,ifavaliable ifencodingisNoneandchardet: confidence="tentative" try: fromchardet.universaldetectorimportUniversalDetector buffers=[] detector=UniversalDetector() whilenotdetector.done: buffer=self.rawStream.read(self.numBytesChardet) ifnotbuffer: break buffers.append(buffer) detector.feed(buffer) detector.close() encoding=detector.result['encoding'] self.rawStream.seek(0) exceptImportError: pass #Ifallelsefailsusethedefaultencoding ifencodingisNone: confidence="tentative" encoding=self.defaultEncoding #Substituteforequivalentencodings: encodingSub={"iso-8859-1":"windows-1252"} ifencoding.lower()inencodingSub: encoding=encodingSub[encoding.lower()] returnencoding,confidence Example54 Project: Python24   Author:HaoZhang95   File: _inputstream.py   License: MITLicense 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example55 Project: Computable   Author:ktraunmueller   File: format.py   License: MITLicense 5 votes defdetect_console_encoding(): """ Trytofindthemostcapableencodingsupportedbytheconsole. slighlymodifiedfromthewayIPythonhandlesthesameissue. """ importlocale global_initial_defencoding encoding=None try: encoding=sys.stdout.encodingorsys.stdin.encoding exceptAttributeError: pass #tryagainforsomethingbetter ifnotencodingor'ascii'inencoding.lower(): try: encoding=locale.getpreferredencoding() exceptException: pass #whenallelsefails.thiswillusuallybe"ascii" ifnotencodingor'ascii'inencoding.lower(): encoding=sys.getdefaultencoding() #GH3360,savethereporteddefencodingatimporttime #MPLbackendsmaychangeit.Makeavailablefordebugging. ifnot_initial_defencoding: _initial_defencoding=sys.getdefaultencoding() returnencoding Example56 Project: fxxkpython   Author:wistbean   File: _inputstream.py   License: GNUGeneralPublicLicensev3.0 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingisnotNoneandencoding.namein("utf-16be","utf-16le"): encoding=lookupEncoding("utf-8") returnencoding Example57 Project: vnpy_crypto   Author:birforce   File: console.py   License: MITLicense 5 votes defdetect_console_encoding(): """ Trytofindthemostcapableencodingsupportedbytheconsole. slightlymodifiedfromthewayIPythonhandlesthesameissue. """ global_initial_defencoding encoding=None try: encoding=sys.stdout.encodingorsys.stdin.encoding exceptAttributeError: pass #tryagainforsomethingbetter ifnotencodingor'ascii'inencoding.lower(): try: encoding=locale.getpreferredencoding() exceptException: pass #whenallelsefails.thiswillusuallybe"ascii" ifnotencodingor'ascii'inencoding.lower(): encoding=sys.getdefaultencoding() #GH3360,savethereporteddefencodingatimporttime #MPLbackendsmaychangeit.Makeavailablefordebugging. ifnot_initial_defencoding: _initial_defencoding=sys.getdefaultencoding() returnencoding Example58 Project: Offensive-Security-Certified-Professional   Author:StevenDias33   File: padding-oracle-tests.py   License: MITLicense 5 votes defdetect_encoding(self): b64url='^[a-zA-Z0-9_\-]+={0,2}$' b64std='^[a-zA-Z0-9\+\/]+={0,2}$' hexenc1='^[0-9a-f]+$' hexenc2='^[0-9A-F]+$' data=self.data ifre.search('%[0-9a-f]{2}',self.data,re.I)!=None: dbg('Sampleisurl-encoded.') data=urllib.unquote_plus(data) self.urlencoded=True if(re.match(hexenc1,data)orre.match(hexenc2,data))andlen(data)%2==0: dbg('Hexencodingdetected.') returnself.HEXENC ifre.match(b64url,data): dbg('Base64urlencodingdetected.') returnself.B64URL ifre.match(b64std,data): dbg('StandardBase64encodingdetected.') returnself.B64STD error('Warning:Couldnotdetectdataencoding.Goingwithplaindata.') returnself.NONE Example59 Project: MARA_Framework   Author:xtiankisutsa   File: inputstream.py   License: GNULesserGeneralPublicLicensev3.0 5 votes defdetectEncodingMeta(self): """Reporttheencodingdeclaredbythemetaelement """ buffer=self.rawStream.read(self.numBytesMeta) assertisinstance(buffer,bytes) parser=EncodingParser(buffer) self.rawStream.seek(0) encoding=parser.getEncoding() ifencodingin("utf-16","utf-16-be","utf-16-le"): encoding="utf-8" returnencoding Example60 Project: filmkodi   Author:mrknow   File: autopep8.py   License: ApacheLicense2.0 5 votes defdetect_encoding(filename): """Returnfileencoding.""" try: withopen(filename,'rb')asinput_file: check_lib2to3() fromlib2to3.pgen2importtokenizeaslib2to3_tokenize encoding=lib2to3_tokenize.detect_encoding(input_file.readline)[0] #Checkforcorrectnessofencoding withopen_with_encoding(filename,encoding)astest_file: test_file.read() returnencoding except(LookupError,SyntaxError,UnicodeDecodeError): return'latin-1' reportthisad



請為這篇文章評分?