ÿØÿà JFIF    ÿÛ „ !.%+&8&+/1555$;@;4?.451 4,$,44444444444414444444444444444444444444444444444444ÿÀ  á á" ÿÄ     ÿÄ ?    !1AQaq"2‘¡±ÁðBRbrÑá#‚’¢²3S CñÿÄ   ÿÄ !    !1QAa‘2ÿÚ   ? 5˜Z¯V¦cø)›t/? z¨±>Õ5€¶‹Á¤·¼z¼Ü¬+ñ®v¤¨_ˆR­BFn©—˜ý®ç̝P8gýt·ÉSTŦˆìät?þé¼íìN/Þa)ì–í6ô… Ï¿øÃj´¿KÇü]ÿ ªô¹-eKànëÕHTx}ýSÜ›ÿ ”7Ø×&µ<¦  ¥ÑO¶[Ù¯ä¨ÞÃÿ PZ-¬;#õ|•oaÿ ©CìÞz3˜öː/¤­ñTûIØ}š^ mÓ%ªxˆ¥ÉŸu=Z+ISe¿45™¼u;ú&WØ÷€æßQ™®{|íx*TC“#ZŠìZ§²‹ 6pv…³¿¡äª*áZÐ%ÒOáˆo"x«OHk w±æ+¬V(kMúŸ5Vö«$ ÁrÏbàb57/luR ¸ÑÛj Òµì`Мq­û žICÀÊ•©4€Âcà¨Ï€O´<èÐ:›ù(Ë^L8þ‘ÍÌ#¸Ð_Ì©ÙK(Öz 4¬û+¸;ü’V’84‘¬ÃŽ:[â‡ÔÌáõp¢~§ªlæ£ö{®G>J¼"°‡7¯ÆÉèßû ‹É‹§ÁòÃýâßî ^ƾÙõ‹×óH#«LP½ïX=xÑÍ$|W?•~• îëÔ©ª‹ {ÝT…Kÿ ”hûâá)J*ö˜–ÔU;iÇ€/ ÆþjóZ\ýwØ=Ìm ºèËL9 ýèÆð/¨’¥öo=nË.%Îì ŽÕ¯È|{Oj²ƒE6e/ßdÄõ²Ìâ1O®ò×TsəԸhOMýíMˆ¿¼H˜l²,7Â¥#MF/Úf°Ö½± ¸–dr‹NýÊ íjqx{œÉ ä-È ¦ øÄër¨q°ð †nцýÑÄÆ’mä…n<0È™;ÁÝá¯ÁZƒ7FÀmì­ É&9ˆîéi¶ùN§Y• ÃZãAâ?•‡©‰ , ó¾IŸŠc1 4â&y­&pŠ­6;M À 0¹qç»p.á …ŸÅáK@%6·y6ƒ‰3?”úºŽ‰éX5ªPT §µ!=Mž«Ú½‹ÅgÂSâÉaþÓoö–¯ÁÔìR>5éÿ üs¶ÆUcÌ kÇR ]ÿ ù¬¼«VŽ;Â|‡~¢¦”ÏŰæ {L™Õ°Óv¹ò¸írޡעCÃ!íVÕ {¶»sŒNPg/ "uÕbkm²“$ďå¿é¹§°½æz¯6 †s¿!s–wÚÝ“™Œ °.ûj>·+™Òa…©Œ&rÝÎtÛë긪Ît’LAVp%c Úý[ÄzJ¾ÇàXXç@˜ó<êL]·T˜¾¥1Ó©V‡g´æ½¦Ý@¹óø!_@´ÞâSÁ —S3™•& ]@JHÚý©ZŽ €×æÔr»Áf!‡yÞ4Mv*èÓã_{‘åóUuљØ«Oïé*®EvÑ Œ÷‡U \"㪒ÍK+À 4“M¡ï:0¥5í!'<@î´”>Ç»&Z–ïCCV˜Ì5Šo&îhè.žû |ÓK©h$s6KìŒëã)¹hI¦GïOåóI;ììü#É$Š0…Ææ¥TØ.5­¾gn´ “ÂÖ\:hœ89G)J@„}œ:’Ò{/Š"¦_Æ×7Æ3VÇŠÊa]ÚŒÙ€Ä–=®uÁßâACZƒ§§£ Qnâ:«,×{tyø¬iÛcœÜÄ€H½ÄÍCk´÷šß .W'b¤Íåh]÷€=,Žv×cÚEÚHXJX¶îo¨FÒtèöŸ>ªª6[J®Fµ£sGÁeqõfe\íjÒÐïÄÐGˆe1Ø‹.Ø”‘Ëuø Y­ˆÜ ŽG|zùªüMpDnQWÄ”%JŠ™)â*p@Örš«ÕT2Ð%ˆG#ª„ ·¤!°ŸOTÂT¸aÚ%4&h™LµšØüÐ.F¿²ÐÞ_Ç‚¾ÅÃaÜ÷09Æ q€öy˜v‡85õN÷]¬äѼóS{°_MެúÔ#°Ç¸0åÞè2ëôPcvÆw9®ií1Ä8F™˜à‰´+‰Ik1òÝ7“Ñ×ÒsÝ\x‚h`ÞÑ`ó"|µEcý£n˜h`}GÞ !±ù²Ápü²ß6 0ïi󜵩SÈÇ7˜-ÕURO˜¦´f$ªž-Í6(œ}<„ éc øs]ŽŽ„*—¾ ìdŽ„)méª\¿êÎIg¾ØÞ~I#C/¼¼´EÁÈŽi8“©õådô·>euä ƒ'Ê×लR1ÉJE1ÐAát`t;ÇР%Ý<‡¥„ÍÆ`×Oyó)õiI€ñQaŸ4Ûù\áàaÃÔ¹HÃu¹*k€¦<„e S‡&õÏ B!ŽhüÞ`yj}mªf×\¿ Ç~æ­9‡û\՞Ǖg²1Žû5V7 !àöšm° c`ܬøÇìµÒ'P"?…´Ö,"§^•õލsÔ)6˜sæéÍR¼ ò|Sl”‹7 nPW Gòú÷½§O¯‡„l¡kSÞŒr½PÊ@æ¢pŽ-mÿ #Ÿ˜Àº¶Áä¦;ïÔæ$1££`“Õ>„—·ž)ßð³ñ#Ï Ô$¶œ‰ÊE‹À;÷º ¯«P:Ñ”8–IÊtpÞ3ª“>ê“þës4ò2OÏÕ­±zô†Õ§‰.÷ä¸;¿˜“'œ›žª}«Œ{ª±Ì 9ÔóÞÕ‡0 $íWV3Üì¬ —@kÝ4@¿r¼±½¬™›?øØæ´'Áé®CË3-g$˜ö‡×auÚi´Žp/êÛ æF›Ú2v‹ã¿¿,nB1̨ƃqÞa5͝@&Æû“él÷ \C²½UÍc ¯k×¢U ÖéQå™—-r wô ÞÏ<Ò=&=ÿ Ôê Òêˈt,i—;LîÜ á¸*ÚÃ1$êL•LÍ <É)ýÐà’ ;F™{ƒ™˜€&'}‚ãÄK`¡ÞT@I;®žZóè‚s’7®°›+§O­Åq©é»²9<Ô J ¼9O’HL»Ùïì¸rk¼Ž_ý‘TŸu[²ßÚŒ·ü÷B%¯E ŸÔX5êO´ Ç•€’I0 ÉJX` ñ¹õ%;µŸD‘«´€àwÒ™U ûئžÖö\×®×´8 ½‡ºÐÆÓ§?Àkmœ=;d5*@-ì0F Rªýš[Ü6âö̃ڸr*KA9· u*µæ£?U¸Âêí†8@¦X4 e-ò„0s{ HâUpU?¼mñRa°®a%Ð'tÉ×’\¾ÊÉ]t›h>·(Ë@R¼¡Ãt h}’O÷au<+nT…Ö…MӐ??Óe95 q>í/;&JSû °¯ÊéÞ øƒ*Ã2½Ài&:nôUl=¾¿5eˆ3”ñc|Ú2V”>„»&eE;«ÚäC p¢Û úy 9š[ŒÌx¼擼A&DåÒ¯ˆ¤ÀÌ;"˜ ÏQä¸åhÊ}Ûq«Û0WžÒ|»€ø®öCm5•\ÇÀ§Pe3£]0ÃàLDÉ‰1øªxjgwT‚÷¿LΨK‹›ùs—xˆÜ±µ kæ¸f‰‰ÜGk/LÛØ6d9ò¶ùA{ƒA3š/¬D¬khÓk‰`˜"㯒r¿±Óã jx‡°e}<Ñø\3y:'À•/h½Í€Ç4~g ?Û(¼]v‘ªlKÎâ~?O‚W%{Ì:“'©úNq¾›úo(X’¥¯ˆ nFê{Ç€ü?º'ë ø‹ì Þ09ŒÌç9Æ —ËC`j@ÓÄ(+a‹un¸#ÂꟋ{K`‘ÑÍÍ'à´»/Û,KW;Þ4²þð ï Nm|~fGÏ(…³Ã)«1ö­Õ ¥‡¨©ƒÃ™ü-s=à=U66Ï«Ýc蓦W¹íž®›nÔ%êÇìŒ<#Ü×84ån®Ð ÒåOC` ñânÑs‡¢ç 1õ%Îhì½Ã½® e:ݼUZo™`  ÅZŸŒÊ«ê1ÏÄo$q¹Þ€©ˆhÐÉä¯ñ[!…Ú˜àJ:x2$Íß&PåT£6ç— ‡Í*4Ýšçjÿ ‰É nófÐ ó(L5C•åÆ\rMÒ@ò }y-W}™üýVù—ú¢=Ù”c®‘< M ž ´Phr ¦©TD ‘ù.$´÷O‡‘V2Æò.=IUŒ=ž‡â¬i™aþÓåÙ?òUø'ØÖ•.~* šTŒ!•-×áºTâ®ä#õü'´ eýlYÅÓeÕKÂrT"CÚ@u!Óxƒ{š3€}1¿(r}%«nËamjÑ%ÑNEò v ˜à  σöK³,*º.àzù¨™Ó ÚçâU¦*¿ 9{%Ö¹ njûdaXöb) kÛÆ±ûÓ\°M7ˆÂ=û›ç¿Ã‚­V»Cg–8ÙêE- j)k$º`Ã-ùEýeBÆÇ]c¡°ñty&Òd0nõ'¡W+ƒ*|–øµFa\GQªEAÔp5\Ǽ·¼Ç8·õ -â§Ú[ ‡ uZeÖ 3}×d'+¹:ð+K†Û®s!Ï$úe€<Û”x)1»a­¡LC]¸µík…ÚàA»AYº{†ªS[¦5HÒ7ù --,ísòDØ€èk ÞÀîÜ ò@â( ËNˆë›4ô½•/¦o‡€Û7 ê•ÆêòðÜy'Án½µ á˜ݦ ndeo…[ì¶Ê,¥R³Ä=À±—–ß;£™´ñSâ*g§”ïaið‘Jå~™ÓÞ ß³Õ¢»8x埒²52>AÊb&-÷\7´éÄù€T˜,w;3{ï˜k…à¹ÄqÀ«œ{€\ ˆ¾[´¨јr &Úé„Ívˆ±8†¿]|¬ņ4I×pÞS1ÈÖz‰#Ìv‡G!YNògñ:màTz¢Ý1ô©^O=~ë|5Bã™ç•¼µõ•bÆ@úÕS¬ÈŒ#¬zünrŸ û” Z²•èðV"ÁHÚý©wÝ €7¼Ìu1hÑa3Éä û f$o¿É ™Ú›ÝçnpÒ3äÌ3†Í§,Äï]$‰/pê †«À¼¸e9­Æê_C]žƒ·ý·frÁN«, E=›Çq -‰öŒ:aÏ¿±í&£Í:-} 84‘ÿ eƒQÑeëSsuiA ³g㟥ú£?ÿ ʼn*”“÷aühe:ÊWa@ÒÞk±eØ] F Ô—r.åä˜ @ö¥ªZoÐýYL·¥S²G/‡ñ <~*ZÆ´è>JlòàÛÆ½ÿ 窘ìGN¢:I®KšJp/`íIÁÀõ#Ä-€ö­šµŒoF4|ÆQØÆ@Ì|£Ô…¢À{9˜è½Üó›€ôYÒÎYsið;ís¤€à²ˆ‚4qÉVŒI$ ‰"° æµ8cXGjœˏ¡Aâý•ËÜ¢ûï e·çLx']á"oÅÎê3¯Ç—¹”ó0nå‚âg{Œñ> S´˜îè°g238‚ãköÝfÚd´6Ò€;ò÷±¢™¼›º ¢Æ'¥Ðx'e¬ç ]bÈÆV¢ó‹kýBO ðÊâ$Ÿ!×T 3Mýמ žìٍàÌü‘8÷€àæØ8æ©6‰©L´«…oãpð„~Çk‰!ñ;‹”ÛžÍ àž±z Ÿôû øŸÝužÏ;ÿ #|u6™Þ¬ÚˆÐõA4¶â|ôl|Ê2ŽÇ¤ÝÅÇY.<#Aí.k§hóF‚”Y; M½Ö4hŸ4&›­¿tès´%FìL¥£Ãk‰ÇT¤haÁ¤ÚxfÉ`ÑìË›>i 3t‚:,–+^÷´–{Û–Nxi"x‘Ûg î¨>¥Õ܁ùZH,2Û“:8xÊ¢Çí9.É-Ìâã-=çjwµS˜dütžçwýGòú®®ûº_ˆýx$–¡ãøO EÚÛÏ÷R„×w+3£Á£öUMyR²¹âŒ°š›¸Ñãò9§Ó_Dl+Ùßc›úšGÅÌc†Ž!Ko=¶.‘Îÿ c²(2®V mª.ÿ ¹B›¹å ù„öŸSV>™ü¯$y:G¢Z×àøúdî¹û­·ýÇ´:•c LÍõi_‹ö+ÎæGÊè>OŠ•äž´§Þ{X}¨1ÚTc›»Qþ•êô°t¿OP?eæ~É{5]•ÙR£r5†nZ\ã@ &îJõ ¾àC°þV>fé¥/ü5ñÊIº_é5 ;e­h<@ Ä&æÃëE%;X,ÒãÆÞ`Oò¦kŸm#˜!ÀyÄ¢| óLšò¥Ä` ¶R=|ÈCâh5ò3DˆïF†ðÒ#ÅìÛœ?¸yhBãœí ZxßÎÄhºRK„`Þödvײ™ÀÈÑÒgŒuY w³%†ƒÓzõ ÖÏp‚dH®¦A´ù§»ÓÇMæ~)ˆð‡û:ù&Ä •vGD´À n ݇¼Ö8Fö óáà£~Ë¥x`oK|Ä?fxiØü%pìR>éò+Û±éÎ>núlFŤ'tq8LZÏvÃ?„¡ß±È⽆¯³íü@x|PöUäèØã¡ð‚ŒAìÏ"vÍwóŸÍ{ ý0.z È•Ö{,N¡£¡ŸKÕÙž>Ýœþ ÍÀ°<×EA!Å‚D™IúOÍ¡>ôG}Â` ÍßkÜL™Ž Þð™ {IøF²¹òQ3&!ÃÂÞz.d&Ï-sH¸,Ôõ˜ŽP€ 77ˆÝ¼ÊëÜw =cÕ Ú,ØÐ5ÎYÐ)ì´öœgŒ[¤ßv㙑8心>h]§µháYš£²ºÑ.{Ï7Sð•?´~×SÃKýJÛ˜ ™Íäiúu<µX¶1õ^kâçIÑ£sZ4h>j*ÔšD:4­¿_ ÷¸ Õxæÿ ¸?Mù _•­ÊÐ ä ÷ý ÑwL œ­ïnTkÛUÍN©ë:¦fV ¶ÜÔÜMªÅâA½–¿R×TXš-%iTÊT•‡Ù‚JôϐZxWÑè‰f‰òG º ×Õû2aZ7OU3[“×AT–ÞŒ…-‘¤”Ì ì&(ˆ¿­•ƒkï’:ðY¦W‘ Å)“†‘˜³Åtcø˜ñTÂwÚÇ4|üLÇªí–v- qˆèU qPE.†â‘˜µ Æ,ÐÅs]8¾„oúÑ i>ÜxxÈó)ƒ ´æÁâØ$À‰vžŸf$Ž |ãw;ÀÁIJ»b` {¦Ó¤Ú$©YÀ‘n@Óïž«9J¼êG m¤ ܯ¹ÌW4€ÐÒÅÛ‡#褕Ÿn-?í|с¥÷Ú¹¬'´ÞÜ9ÓK `hê£SÄSà?7—Wí_´…óB›»:=Ãïq`<8ñÓŒÑlú2d¬ê³£hÖ[l|$vÝro~'R®‰§°ñmY ͧäP |PUª¹·:3Œ[Û{Xÿ ºâ@‚W–Äé u‚ ¯´*=íή.pûÒdt @G‰¬ s¸ ëÉücr ÞæÑ¨Ê@>¤¢Ö±. Þ'¯°ÌME[YéïĵÂCå½ Ué©Áû'Ê9%eÔðNU”ë‘ÌsD3/®+UI˜9h.WC”빓$#:pz:YÓ ¿xž* ³$Í +$kñAŠ‹†¢ Uê>¸)_š¬÷©ßAÂÔb9ÇU ¯¾á•9¯ÏÏ÷O÷¼¼Fähal1‰3Ì[Ïr•´UCksNÐ] R‘¸¥H+§Šé†c©vÖÞ0iÓ76s†î!§=ß ¼~Ô'°Ãmäoäš³ªøi1úÉ)³yV8 CLÄØÁ‘WYïi€H6ÖÑiámø^ÈY´°Ñ7¥Û*—Ñ©L«Qƒï—Ùrÿ ›£Ð*š¸ˆL©ˆ$ˆ ÷¾D§9È®«qbqC)–ˆïv´çñsÑVT­Ø, <àïºÀO«Jý·õ àfPìð .wFšir´þ’2_Y *Æ€x\« ì€9š@ Ž|F⇥ˆkZ@hÖÄ0t¿-<“‹qµ¾*ZL¤Ú)&BJpÓF5=$„at*Zš$’ÑtdûÝRI1 2މ$€$I$#‰SÞ’Hë¬ï;Á$¡t$’`<(ñÇt)$‡Ð.Êf¢X’Kt=Éé$‚ˆªè¢oÝëòI%Rgcª÷ŠyI%¡‰ÿ !ñ)´õ $¤ Ô’IIGÿÙ--- -- Library methods for handling unicode strings. -- -- @author Daniel Miller -- @copyright Same as Nmap--See https://nmap.org/book/man-legal.html local string = require "string" local table = require "table" local stdnse = require "stdnse" local unittest = require "unittest" local tableaux = require "tableaux" _ENV = stdnse.module("unicode", stdnse.seeall) -- Localize a few functions for a tiny speed boost, since these will be looped -- over every char of a string local byte = string.byte local char = string.char local pack = string.pack local unpack = string.unpack local concat = table.concat ---Decode a buffer containing Unicode data. --@param buf The string/buffer to be decoded --@param decoder A Unicode decoder function (such as utf8_dec) --@param bigendian For encodings that care about byte-order (such as UTF-16), -- set this to true to force big-endian byte order. Default: -- false (little-endian) --@return A list-table containing the code points as numbers function decode(buf, decoder, bigendian) local cp = {} local pos = 1 while pos <= #buf do pos, cp[#cp+1] = decoder(buf, pos, bigendian) end return cp end ---Encode a list of Unicode code points --@param list A list-table of code points as numbers --@param encoder A Unicode encoder function (such as utf8_enc) --@param bigendian For encodings that care about byte-order (such as UTF-16), -- set this to true to force big-endian byte order. Default: -- false (little-endian) --@return An encoded string function encode(list, encoder, bigendian) local buf = {} for i, cp in ipairs(list) do buf[i] = encoder(cp, bigendian) end return table.concat(buf, "") end ---Transcode a string from one format to another -- --The string will be decoded and re-encoded in one pass. This saves some --overhead vs simply passing the output of unicode.encode to --unicode.decode. --@param buf The string/buffer to be transcoded --@param decoder A Unicode decoder function (such as utf16_dec) --@param encoder A Unicode encoder function (such as utf8_enc) --@param bigendian_dec Set this to true to force big-endian decoding. --@param bigendian_enc Set this to true to force big-endian encoding. --@return An encoded string function transcode(buf, decoder, encoder, bigendian_dec, bigendian_enc) local out = {} local cp local pos = 1 while pos <= #buf do pos, cp = decoder(buf, pos, bigendian_dec) out[#out+1] = encoder(cp, bigendian_enc) end return table.concat(out) end --- Determine (poorly) the character encoding of a string -- -- First, the string is checked for a Byte-order Mark (BOM). This can be -- examined to determine UTF-16 with endianness or UTF-8. If no BOM is found, -- the string is examined. -- -- If null bytes are encountered, UTF-16 is assumed. Endianness is determined -- by byte position, assuming the null is the high-order byte. Otherwise, if -- byte values over 127 are found, UTF-8 decoding is attempted. If this fails, -- the result is 'other', otherwise it is 'utf-8'. If no high bytes are found, -- the result is 'ascii'. -- --@param buf The string/buffer to be identified --@param len The number of bytes to inspect in order to identify the string. -- Default: 100 --@return A string describing the encoding: 'ascii', 'utf-8', 'utf-16be', -- 'utf-16le', or 'other' meaning some unidentified 8-bit encoding function chardet(buf, len) local limit = len or 100 if limit > #buf then limit = #buf end -- Check BOM if limit >= 2 then local bom1, bom2 = byte(buf, 1, 2) if bom1 == 0xff and bom2 == 0xfe then return 'utf-16le' elseif bom1 == 0xfe and bom2 == 0xff then return 'utf-16be' elseif limit >= 3 then local bom3 = byte(buf, 3) if bom1 == 0xef and bom2 == 0xbb and bom3 == 0xbf then return 'utf-8' end end end -- Try bytes local pos = 1 local high = false local utf8 = true while pos < limit do local c = byte(buf, pos) if c == 0 then if pos % 2 == 0 then return 'utf-16le' else return 'utf-16be' end utf8 = false pos = pos + 1 elseif c > 127 then if not high then high = true end if utf8 then local p, cp = utf8_dec(buf, pos) if not p then utf8 = false else pos = p end end if not utf8 then pos = pos + 1 end else pos = pos + 1 end end if high then if utf8 then return 'utf-8' else return 'other' end else return 'ascii' end end ---Encode a Unicode code point to UTF-16. See RFC 2781. -- -- Windows OS prior to Windows 2000 only supports UCS-2, so beware using this -- function to encode code points above 0xFFFF. --@param cp The Unicode code point as a number --@param bigendian Set this to true to encode big-endian UTF-16. Default is -- false (little-endian) --@return A string containing the code point in UTF-16 encoding. function utf16_enc(cp, bigendian) local fmt = "> 10), 0xDC00 + (cp & 0x3FF)) else return nil end end ---Decodes a UTF-16 character. -- -- Does not check that the returned code point is a real character. -- Specifically, it can be fooled by out-of-order lead- and trail-surrogate -- characters. --@param buf A string containing the character --@param pos The index in the string where the character begins --@param bigendian Set this to true to encode big-endian UTF-16. Default is -- false (little-endian) --@return pos The index in the string where the character ended --@return cp The code point of the character as a number function utf16_dec(buf, pos, bigendian) local fmt = "= 0xD800 and cp <= 0xDFFF then local high = (cp - 0xD800) << 10 cp, pos = unpack(fmt, buf, pos) cp = 0x10000 + high + cp - 0xDC00 end return pos, cp end ---Encode a Unicode code point to UTF-8. See RFC 3629. -- -- Does not check that cp is a real character; that is, doesn't exclude the -- surrogate range U+D800 - U+DFFF and a handful of others. --@param cp The Unicode code point as a number --@return A string containing the code point in UTF-8 encoding. function utf8_enc(cp) local bytes = {} local n, mask if cp % 1.0 ~= 0.0 or cp < 0 then -- Only defined for nonnegative integers. return nil elseif cp <= 0x7F then -- Special case of one-byte encoding. return char(cp) elseif cp <= 0x7FF then n = 2 mask = 0xC0 elseif cp <= 0xFFFF then n = 3 mask = 0xE0 elseif cp <= 0x10FFFF then n = 4 mask = 0xF0 else return nil end while n > 1 do bytes[n] = char(0x80 + (cp & 0x3F)) cp = cp >> 6 n = n - 1 end bytes[1] = char(mask + cp) return table.concat(bytes) end ---Decodes a UTF-8 character. -- -- Does not check that the returned code point is a real character. --@param buf A string containing the character --@param pos The index in the string where the character begins --@return pos The index in the string where the character ended or nil on error --@return cp The code point of the character as a number, or an error string function utf8_dec(buf, pos) pos = pos or 1 local n, mask local bv = byte(buf, pos) if bv <= 0x7F then return pos+1, bv elseif bv <= 0xDF then --110xxxxx 10xxxxxx n = 1 mask = 0xC0 elseif bv <= 0xEF then --1110xxxx 10xxxxxx 10xxxxxx n = 2 mask = 0xE0 elseif bv <= 0xF7 then --11110xxx 10xxxxxx 10xxxxxx 10xxxxxx n = 3 mask = 0xF0 else return nil, string.format("Invalid UTF-8 byte at %d", pos) end local cp = bv - mask if pos + n > #buf then return nil, string.format("Incomplete UTF-8 sequence at %d", pos) end for i = 1, n do bv = byte(buf, pos + i) if bv < 0x80 or bv > 0xBF then return nil, string.format("Invalid UTF-8 sequence at %d", pos + i) end cp = (cp << 6) + (bv & 0x3F) end return pos + 1 + n, cp end -- Code Page 437, native US-English Windows OEM code page local cp437_decode = { [0x80] = 0x00c7, [0x81] = 0x00fc, [0x82] = 0x00e9, [0x83] = 0x00e2, [0x84] = 0x00e4, [0x85] = 0x00e0, [0x86] = 0x00e5, [0x87] = 0x00e7, [0x88] = 0x00ea, [0x89] = 0x00eb, [0x8a] = 0x00e8, [0x8b] = 0x00ef, [0x8c] = 0x00ee, [0x8d] = 0x00ec, [0x8e] = 0x00c4, [0x8f] = 0x00c5, [0x90] = 0x00c9, [0x91] = 0x00e6, [0x92] = 0x00c6, [0x93] = 0x00f4, [0x94] = 0x00f6, [0x95] = 0x00f2, [0x96] = 0x00fb, [0x97] = 0x00f9, [0x98] = 0x00ff, [0x99] = 0x00d6, [0x9a] = 0x00dc, [0x9b] = 0x00a2, [0x9c] = 0x00a3, [0x9d] = 0x00a5, [0x9e] = 0x20a7, [0x9f] = 0x0192, [0xa0] = 0x00e1, [0xa1] = 0x00ed, [0xa2] = 0x00f3, [0xa3] = 0x00fa, [0xa4] = 0x00f1, [0xa5] = 0x00d1, [0xa6] = 0x00aa, [0xa7] = 0x00ba, [0xa8] = 0x00bf, [0xa9] = 0x2310, [0xaa] = 0x00ac, [0xab] = 0x00bd, [0xac] = 0x00bc, [0xad] = 0x00a1, [0xae] = 0x00ab, [0xaf] = 0x00bb, [0xb0] = 0x2591, [0xb1] = 0x2592, [0xb2] = 0x2593, [0xb3] = 0x2502, [0xb4] = 0x2524, [0xb5] = 0x2561, [0xb6] = 0x2562, [0xb7] = 0x2556, [0xb8] = 0x2555, [0xb9] = 0x2563, [0xba] = 0x2551, [0xbb] = 0x2557, [0xbc] = 0x255d, [0xbd] = 0x255c, [0xbe] = 0x255b, [0xbf] = 0x2510, [0xc0] = 0x2514, [0xc1] = 0x2534, [0xc2] = 0x252c, [0xc3] = 0x251c, [0xc4] = 0x2500, [0xc5] = 0x253c, [0xc6] = 0x255e, [0xc7] = 0x255f, [0xc8] = 0x255a, [0xc9] = 0x2554, [0xca] = 0x2569, [0xcb] = 0x2566, [0xcc] = 0x2560, [0xcd] = 0x2550, [0xce] = 0x256c, [0xcf] = 0x2567, [0xd0] = 0x2568, [0xd1] = 0x2564, [0xd2] = 0x2565, [0xd3] = 0x2559, [0xd4] = 0x2558, [0xd5] = 0x2552, [0xd6] = 0x2553, [0xd7] = 0x256b, [0xd8] = 0x256a, [0xd9] = 0x2518, [0xda] = 0x250c, [0xdb] = 0x2588, [0xdc] = 0x2584, [0xdd] = 0x258c, [0xde] = 0x2590, [0xdf] = 0x2580, [0xe0] = 0x03b1, [0xe1] = 0x00df, [0xe2] = 0x0393, [0xe3] = 0x03c0, [0xe4] = 0x03a3, [0xe5] = 0x03c3, [0xe6] = 0x00b5, [0xe7] = 0x03c4, [0xe8] = 0x03a6, [0xe9] = 0x0398, [0xea] = 0x03a9, [0xeb] = 0x03b4, [0xec] = 0x221e, [0xed] = 0x03c6, [0xee] = 0x03b5, [0xef] = 0x2229, [0xf0] = 0x2261, [0xf1] = 0x00b1, [0xf2] = 0x2265, [0xf3] = 0x2264, [0xf4] = 0x2320, [0xf5] = 0x2321, [0xf6] = 0x00f7, [0xf7] = 0x2248, [0xf8] = 0x00b0, [0xf9] = 0x2219, [0xfa] = 0x00b7, [0xfb] = 0x221a, [0xfc] = 0x207f, [0xfd] = 0x00b2, [0xfe] = 0x25a0, [0xff] = 0x00a0, } local cp437_encode = tableaux.invert(cp437_decode) ---Encode a Unicode code point to CP437 -- -- Returns nil if the code point cannot be found in CP437 --@param cp The Unicode code point as a number --@return A string containing the related CP437 character function cp437_enc(cp) if cp < 0x80 then return char(cp) else local bv = cp437_encode[cp] if bv == nil then return nil else return char(bv) end end end ---Decodes a CP437 character --@param buf A string containing the character --@param pos The index in the string where the character begins --@return pos The index in the string where the character ended --@return cp The code point of the character as a number function cp437_dec(buf, pos) pos = pos or 1 local bv = byte(buf, pos) if bv < 0x80 then return pos + 1, bv else return pos + 1, cp437_decode[bv] end end ---Helper function for the common case of UTF-16 to UTF-8 transcoding, such as --from a Windows/SMB unicode string to a printable ASCII (subset of UTF-8) --string. --@param from A string in UTF-16, little-endian --@return The string in UTF-8 function utf16to8(from) return transcode(from, utf16_dec, utf8_enc, false, nil) end ---Helper function for the common case of UTF-8 to UTF-16 transcoding, such as --from a printable ASCII (subset of UTF-8) string to a Windows/SMB unicode --string. --@param from A string in UTF-8 --@return The string in UTF-16, little-endian function utf8to16(from) return transcode(from, utf8_dec, utf16_enc, nil, false) end if not unittest.testing() then return _ENV end test_suite = unittest.TestSuite:new() test_suite:add_test(function() local pos, cp = utf8_dec("\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E") return pos == 4 and cp == 0x65E5, string.format("Expected 4, 0x65E5; got %d, 0x%x", pos, cp) end, "utf8_dec") test_suite:add_test(unittest.equal(encode({0x65E5,0x672C,0x8A9E}, utf8_enc), "\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E"),"encode utf-8") test_suite:add_test(unittest.equal(encode({0x12345,61,82,97}, utf16_enc), "\x08\xD8\x45\xDF=\0R\0a\0"),"encode utf-16") test_suite:add_test(unittest.equal(encode({0x12345,61,82,97}, utf16_enc, true), "\xD8\x08\xDF\x45\0=\0R\0a"),"encode utf-16, big-endian") test_suite:add_test(unittest.table_equal(decode("\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E", utf8_dec), {0x65E5,0x672C,0x8A9E}),"decode utf-8") test_suite:add_test(unittest.table_equal(decode("\x08\xD8\x45\xDF=\0R\0a\0", utf16_dec), {0x12345,61,82,97}),"decode utf-16") test_suite:add_test(unittest.table_equal(decode("\xD8\x08\xDF\x45\0=\0R\0a", utf16_dec, true), {0x12345,61,82,97}),"decode utf-16, big-endian") test_suite:add_test(unittest.equal(utf16to8("\x08\xD8\x45\xDF=\0R\0a\0"), "\xF0\x92\x8D\x85=Ra"),"utf16to8") test_suite:add_test(unittest.equal(utf8to16("\xF0\x92\x8D\x85=Ra"), "\x08\xD8\x45\xDF=\0R\0a\0"),"utf8to16") test_suite:add_test(unittest.equal(encode({0x221e, 0x2248, 0x30}, cp437_enc), "\xec\xf70"), "encode cp437") test_suite:add_test(unittest.table_equal(decode("\x81ber", cp437_dec), {0xfc, 0x62, 0x65, 0x72}), "decode cp437") test_suite:add_test(unittest.equal(chardet("\x08\xD8\x45\xDF=\0R\0a\0"), 'utf-16le'), "detect utf-16le") test_suite:add_test(unittest.equal(chardet("\xD8\x08\xDF\x45\0=\0R\0a"), 'utf-16be'), "detect utf-16be") test_suite:add_test(unittest.equal(chardet("...\xF0\x92\x8D\x85=Ra"), 'utf-8'), "detect utf-8") test_suite:add_test(unittest.equal(chardet("This sentence is completely normal."), 'ascii'), "detect ascii") test_suite:add_test(unittest.equal(chardet('Comme ci, comme \xe7a'), 'other'), "detect other") return _ENV