pugixml.cpp 330 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030
  1. /**
  2. * pugixml parser - version 1.12
  3. * --------------------------------------------------------
  4. * Copyright (C) 2006-2022, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
  5. * Report bugs and download new versions at https://pugixml.org/
  6. *
  7. * This library is distributed under the MIT License. See notice at the end
  8. * of this file.
  9. *
  10. * This work is based on the pugxml parser, which is:
  11. * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
  12. */
  13. #ifndef SOURCE_PUGIXML_CPP
  14. #define SOURCE_PUGIXML_CPP
  15. #include "pugixml.hpp"
  16. #include <stdlib.h>
  17. #include <stdio.h>
  18. #include <string.h>
  19. #include <assert.h>
  20. #include <limits.h>
  21. #ifdef PUGIXML_WCHAR_MODE
  22. # include <wchar.h>
  23. #endif
  24. #ifndef PUGIXML_NO_XPATH
  25. # include <math.h>
  26. # include <float.h>
  27. #endif
  28. #ifndef PUGIXML_NO_STL
  29. # include <istream>
  30. # include <ostream>
  31. # include <string>
  32. #endif
  33. // For placement new
  34. #include <new>
  35. #ifdef _MSC_VER
  36. # pragma warning(push)
  37. # pragma warning(disable: 4127) // conditional expression is constant
  38. # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
  39. # pragma warning(disable: 4702) // unreachable code
  40. # pragma warning(disable: 4996) // this function or variable may be unsafe
  41. #endif
  42. #if defined(_MSC_VER) && defined(__c2__)
  43. # pragma clang diagnostic push
  44. # pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe
  45. #endif
  46. #ifdef __INTEL_COMPILER
  47. # pragma warning(disable: 177) // function was declared but never referenced
  48. # pragma warning(disable: 279) // controlling expression is constant
  49. # pragma warning(disable: 1478 1786) // function was declared "deprecated"
  50. # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
  51. #endif
  52. #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
  53. # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
  54. #endif
  55. #ifdef __BORLANDC__
  56. # pragma option push
  57. # pragma warn -8008 // condition is always false
  58. # pragma warn -8066 // unreachable code
  59. #endif
  60. #ifdef __SNC__
  61. // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
  62. # pragma diag_suppress=178 // function was declared but never referenced
  63. # pragma diag_suppress=237 // controlling expression is constant
  64. #endif
  65. #ifdef __TI_COMPILER_VERSION__
  66. # pragma diag_suppress 179 // function was declared but never referenced
  67. #endif
  68. // Inlining controls
  69. #if defined(_MSC_VER) && _MSC_VER >= 1300
  70. # define PUGI__NO_INLINE __declspec(noinline)
  71. #elif defined(__GNUC__)
  72. # define PUGI__NO_INLINE __attribute__((noinline))
  73. #else
  74. # define PUGI__NO_INLINE
  75. #endif
  76. // Branch weight controls
  77. #if defined(__GNUC__) && !defined(__c2__)
  78. # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
  79. #else
  80. # define PUGI__UNLIKELY(cond) (cond)
  81. #endif
  82. // Simple static assertion
  83. #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
  84. // Digital Mars C++ bug workaround for passing char loaded from memory via stack
  85. #ifdef __DMC__
  86. # define PUGI__DMC_VOLATILE volatile
  87. #else
  88. # define PUGI__DMC_VOLATILE
  89. #endif
  90. // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings
  91. #if defined(__clang__) && defined(__has_attribute)
  92. # if __has_attribute(no_sanitize)
  93. # define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow")))
  94. # else
  95. # define PUGI__UNSIGNED_OVERFLOW
  96. # endif
  97. #else
  98. # define PUGI__UNSIGNED_OVERFLOW
  99. #endif
  100. // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
  101. #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
  102. using std::memcpy;
  103. using std::memmove;
  104. using std::memset;
  105. #endif
  106. // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations
  107. #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
  108. # define LLONG_MIN (-LLONG_MAX - 1LL)
  109. # define LLONG_MAX __LONG_LONG_MAX__
  110. # define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
  111. #endif
  112. // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
  113. #if defined(_MSC_VER) && !defined(__S3E__) && !defined(_WIN32_WCE)
  114. # define PUGI__MSVC_CRT_VERSION _MSC_VER
  115. #elif defined(_WIN32_WCE)
  116. # define PUGI__MSVC_CRT_VERSION 1310 // MSVC7.1
  117. #endif
  118. // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size.
  119. #if __cplusplus >= 201103
  120. # define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__)
  121. #elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
  122. # define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__)
  123. #else
  124. # define PUGI__SNPRINTF sprintf
  125. #endif
  126. // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat.
  127. #ifdef PUGIXML_HEADER_ONLY
  128. # define PUGI__NS_BEGIN namespace pugi { namespace impl {
  129. # define PUGI__NS_END } }
  130. # define PUGI__FN inline
  131. # define PUGI__FN_NO_INLINE inline
  132. #else
  133. # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
  134. # define PUGI__NS_BEGIN namespace pugi { namespace impl {
  135. # define PUGI__NS_END } }
  136. # else
  137. # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
  138. # define PUGI__NS_END } } }
  139. # endif
  140. # define PUGI__FN
  141. # define PUGI__FN_NO_INLINE PUGI__NO_INLINE
  142. #endif
  143. // uintptr_t
  144. #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
  145. namespace pugi
  146. {
  147. # ifndef _UINTPTR_T_DEFINED
  148. typedef size_t uintptr_t;
  149. # endif
  150. typedef unsigned __int8 uint8_t;
  151. typedef unsigned __int16 uint16_t;
  152. typedef unsigned __int32 uint32_t;
  153. }
  154. #else
  155. # include <stdint.h>
  156. #endif
  157. // Memory allocation
  158. PUGI__NS_BEGIN
  159. PUGI__FN void* default_allocate(size_t size)
  160. {
  161. return malloc(size);
  162. }
  163. PUGI__FN void default_deallocate(void* ptr)
  164. {
  165. free(ptr);
  166. }
  167. template <typename T>
  168. struct xml_memory_management_function_storage
  169. {
  170. static allocation_function allocate;
  171. static deallocation_function deallocate;
  172. };
  173. // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
  174. // Without a template<> we'll get multiple definitions of the same static
  175. template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
  176. template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
  177. typedef xml_memory_management_function_storage<int> xml_memory;
  178. PUGI__NS_END
  179. // String utilities
  180. PUGI__NS_BEGIN
  181. // Get string length
  182. PUGI__FN size_t strlength(const char_t* s)
  183. {
  184. assert(s);
  185. #ifdef PUGIXML_WCHAR_MODE
  186. return wcslen(s);
  187. #else
  188. return strlen(s);
  189. #endif
  190. }
  191. // Compare two strings
  192. PUGI__FN bool strequal(const char_t* src, const char_t* dst)
  193. {
  194. assert(src && dst);
  195. #ifdef PUGIXML_WCHAR_MODE
  196. return wcscmp(src, dst) == 0;
  197. #else
  198. return strcmp(src, dst) == 0;
  199. #endif
  200. }
  201. // Compare lhs with [rhs_begin, rhs_end)
  202. PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
  203. {
  204. for (size_t i = 0; i < count; ++i)
  205. if (lhs[i] != rhs[i])
  206. return false;
  207. return lhs[count] == 0;
  208. }
  209. // Get length of wide string, even if CRT lacks wide character support
  210. PUGI__FN size_t strlength_wide(const wchar_t* s)
  211. {
  212. assert(s);
  213. #ifdef PUGIXML_WCHAR_MODE
  214. return wcslen(s);
  215. #else
  216. const wchar_t* end = s;
  217. while (*end) end++;
  218. return static_cast<size_t>(end - s);
  219. #endif
  220. }
  221. PUGI__NS_END
  222. // auto_ptr-like object for exception recovery
  223. PUGI__NS_BEGIN
  224. template <typename T> struct auto_deleter
  225. {
  226. typedef void (*D)(T*);
  227. T* data;
  228. D deleter;
  229. auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
  230. {
  231. }
  232. ~auto_deleter()
  233. {
  234. if (data) deleter(data);
  235. }
  236. T* release()
  237. {
  238. T* result = data;
  239. data = 0;
  240. return result;
  241. }
  242. };
  243. PUGI__NS_END
  244. #ifdef PUGIXML_COMPACT
  245. PUGI__NS_BEGIN
  246. class compact_hash_table
  247. {
  248. public:
  249. compact_hash_table(): _items(0), _capacity(0), _count(0)
  250. {
  251. }
  252. void clear()
  253. {
  254. if (_items)
  255. {
  256. xml_memory::deallocate(_items);
  257. _items = 0;
  258. _capacity = 0;
  259. _count = 0;
  260. }
  261. }
  262. void* find(const void* key)
  263. {
  264. if (_capacity == 0) return 0;
  265. item_t* item = get_item(key);
  266. assert(item);
  267. assert(item->key == key || (item->key == 0 && item->value == 0));
  268. return item->value;
  269. }
  270. void insert(const void* key, void* value)
  271. {
  272. assert(_capacity != 0 && _count < _capacity - _capacity / 4);
  273. item_t* item = get_item(key);
  274. assert(item);
  275. if (item->key == 0)
  276. {
  277. _count++;
  278. item->key = key;
  279. }
  280. item->value = value;
  281. }
  282. bool reserve(size_t extra = 16)
  283. {
  284. if (_count + extra >= _capacity - _capacity / 4)
  285. return rehash(_count + extra);
  286. return true;
  287. }
  288. private:
  289. struct item_t
  290. {
  291. const void* key;
  292. void* value;
  293. };
  294. item_t* _items;
  295. size_t _capacity;
  296. size_t _count;
  297. bool rehash(size_t count);
  298. item_t* get_item(const void* key)
  299. {
  300. assert(key);
  301. assert(_capacity > 0);
  302. size_t hashmod = _capacity - 1;
  303. size_t bucket = hash(key) & hashmod;
  304. for (size_t probe = 0; probe <= hashmod; ++probe)
  305. {
  306. item_t& probe_item = _items[bucket];
  307. if (probe_item.key == key || probe_item.key == 0)
  308. return &probe_item;
  309. // hash collision, quadratic probing
  310. bucket = (bucket + probe + 1) & hashmod;
  311. }
  312. assert(false && "Hash table is full"); // unreachable
  313. return 0;
  314. }
  315. static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key)
  316. {
  317. unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key) & 0xffffffff);
  318. // MurmurHash3 32-bit finalizer
  319. h ^= h >> 16;
  320. h *= 0x85ebca6bu;
  321. h ^= h >> 13;
  322. h *= 0xc2b2ae35u;
  323. h ^= h >> 16;
  324. return h;
  325. }
  326. };
  327. PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count)
  328. {
  329. size_t capacity = 32;
  330. while (count >= capacity - capacity / 4)
  331. capacity *= 2;
  332. compact_hash_table rt;
  333. rt._capacity = capacity;
  334. rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity));
  335. if (!rt._items)
  336. return false;
  337. memset(rt._items, 0, sizeof(item_t) * capacity);
  338. for (size_t i = 0; i < _capacity; ++i)
  339. if (_items[i].key)
  340. rt.insert(_items[i].key, _items[i].value);
  341. if (_items)
  342. xml_memory::deallocate(_items);
  343. _capacity = capacity;
  344. _items = rt._items;
  345. assert(_count == rt._count);
  346. return true;
  347. }
  348. PUGI__NS_END
  349. #endif
  350. PUGI__NS_BEGIN
  351. #ifdef PUGIXML_COMPACT
  352. static const uintptr_t xml_memory_block_alignment = 4;
  353. #else
  354. static const uintptr_t xml_memory_block_alignment = sizeof(void*);
  355. #endif
  356. // extra metadata bits
  357. static const uintptr_t xml_memory_page_contents_shared_mask = 64;
  358. static const uintptr_t xml_memory_page_name_allocated_mask = 32;
  359. static const uintptr_t xml_memory_page_value_allocated_mask = 16;
  360. static const uintptr_t xml_memory_page_type_mask = 15;
  361. // combined masks for string uniqueness
  362. static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
  363. static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
  364. #ifdef PUGIXML_COMPACT
  365. #define PUGI__GETHEADER_IMPL(object, page, flags) // unused
  366. #define PUGI__GETPAGE_IMPL(header) (header).get_page()
  367. #else
  368. #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
  369. // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
  370. #define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8))))
  371. #endif
  372. #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
  373. #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
  374. struct xml_allocator;
  375. struct xml_memory_page
  376. {
  377. static xml_memory_page* construct(void* memory)
  378. {
  379. xml_memory_page* result = static_cast<xml_memory_page*>(memory);
  380. result->allocator = 0;
  381. result->prev = 0;
  382. result->next = 0;
  383. result->busy_size = 0;
  384. result->freed_size = 0;
  385. #ifdef PUGIXML_COMPACT
  386. result->compact_string_base = 0;
  387. result->compact_shared_parent = 0;
  388. result->compact_page_marker = 0;
  389. #endif
  390. return result;
  391. }
  392. xml_allocator* allocator;
  393. xml_memory_page* prev;
  394. xml_memory_page* next;
  395. size_t busy_size;
  396. size_t freed_size;
  397. #ifdef PUGIXML_COMPACT
  398. char_t* compact_string_base;
  399. void* compact_shared_parent;
  400. uint32_t* compact_page_marker;
  401. #endif
  402. };
  403. static const size_t xml_memory_page_size =
  404. #ifdef PUGIXML_MEMORY_PAGE_SIZE
  405. (PUGIXML_MEMORY_PAGE_SIZE)
  406. #else
  407. 32768
  408. #endif
  409. - sizeof(xml_memory_page);
  410. struct xml_memory_string_header
  411. {
  412. uint16_t page_offset; // offset from page->data
  413. uint16_t full_size; // 0 if string occupies whole page
  414. };
  415. struct xml_allocator
  416. {
  417. xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
  418. {
  419. #ifdef PUGIXML_COMPACT
  420. _hash = 0;
  421. #endif
  422. }
  423. xml_memory_page* allocate_page(size_t data_size)
  424. {
  425. size_t size = sizeof(xml_memory_page) + data_size;
  426. // allocate block with some alignment, leaving memory for worst-case padding
  427. void* memory = xml_memory::allocate(size);
  428. if (!memory) return 0;
  429. // prepare page structure
  430. xml_memory_page* page = xml_memory_page::construct(memory);
  431. assert(page);
  432. assert(this == _root->allocator);
  433. page->allocator = this;
  434. return page;
  435. }
  436. static void deallocate_page(xml_memory_page* page)
  437. {
  438. xml_memory::deallocate(page);
  439. }
  440. void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
  441. void* allocate_memory(size_t size, xml_memory_page*& out_page)
  442. {
  443. if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
  444. return allocate_memory_oob(size, out_page);
  445. void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
  446. _busy_size += size;
  447. out_page = _root;
  448. return buf;
  449. }
  450. #ifdef PUGIXML_COMPACT
  451. void* allocate_object(size_t size, xml_memory_page*& out_page)
  452. {
  453. void* result = allocate_memory(size + sizeof(uint32_t), out_page);
  454. if (!result) return 0;
  455. // adjust for marker
  456. ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
  457. if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
  458. {
  459. // insert new marker
  460. uint32_t* marker = static_cast<uint32_t*>(result);
  461. *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
  462. out_page->compact_page_marker = marker;
  463. // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
  464. // this will make sure deallocate_memory correctly tracks the size
  465. out_page->freed_size += sizeof(uint32_t);
  466. return marker + 1;
  467. }
  468. else
  469. {
  470. // roll back uint32_t part
  471. _busy_size -= sizeof(uint32_t);
  472. return result;
  473. }
  474. }
  475. #else
  476. void* allocate_object(size_t size, xml_memory_page*& out_page)
  477. {
  478. return allocate_memory(size, out_page);
  479. }
  480. #endif
  481. void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
  482. {
  483. if (page == _root) page->busy_size = _busy_size;
  484. assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
  485. (void)!ptr;
  486. page->freed_size += size;
  487. assert(page->freed_size <= page->busy_size);
  488. if (page->freed_size == page->busy_size)
  489. {
  490. if (page->next == 0)
  491. {
  492. assert(_root == page);
  493. // top page freed, just reset sizes
  494. page->busy_size = 0;
  495. page->freed_size = 0;
  496. #ifdef PUGIXML_COMPACT
  497. // reset compact state to maximize efficiency
  498. page->compact_string_base = 0;
  499. page->compact_shared_parent = 0;
  500. page->compact_page_marker = 0;
  501. #endif
  502. _busy_size = 0;
  503. }
  504. else
  505. {
  506. assert(_root != page);
  507. assert(page->prev);
  508. // remove from the list
  509. page->prev->next = page->next;
  510. page->next->prev = page->prev;
  511. // deallocate
  512. deallocate_page(page);
  513. }
  514. }
  515. }
  516. char_t* allocate_string(size_t length)
  517. {
  518. static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
  519. PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
  520. // allocate memory for string and header block
  521. size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
  522. // round size up to block alignment boundary
  523. size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
  524. xml_memory_page* page;
  525. xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
  526. if (!header) return 0;
  527. // setup header
  528. ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
  529. assert(page_offset % xml_memory_block_alignment == 0);
  530. assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
  531. header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
  532. // full_size == 0 for large strings that occupy the whole page
  533. assert(full_size % xml_memory_block_alignment == 0);
  534. assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
  535. header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
  536. // round-trip through void* to avoid 'cast increases required alignment of target type' warning
  537. // header is guaranteed a pointer-sized alignment, which should be enough for char_t
  538. return static_cast<char_t*>(static_cast<void*>(header + 1));
  539. }
  540. void deallocate_string(char_t* string)
  541. {
  542. // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
  543. // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
  544. // get header
  545. xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
  546. assert(header);
  547. // deallocate
  548. size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
  549. xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
  550. // if full_size == 0 then this string occupies the whole page
  551. size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
  552. deallocate_memory(header, full_size, page);
  553. }
  554. bool reserve()
  555. {
  556. #ifdef PUGIXML_COMPACT
  557. return _hash->reserve();
  558. #else
  559. return true;
  560. #endif
  561. }
  562. xml_memory_page* _root;
  563. size_t _busy_size;
  564. #ifdef PUGIXML_COMPACT
  565. compact_hash_table* _hash;
  566. #endif
  567. };
  568. PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
  569. {
  570. const size_t large_allocation_threshold = xml_memory_page_size / 4;
  571. xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
  572. out_page = page;
  573. if (!page) return 0;
  574. if (size <= large_allocation_threshold)
  575. {
  576. _root->busy_size = _busy_size;
  577. // insert page at the end of linked list
  578. page->prev = _root;
  579. _root->next = page;
  580. _root = page;
  581. _busy_size = size;
  582. }
  583. else
  584. {
  585. // insert page before the end of linked list, so that it is deleted as soon as possible
  586. // the last page is not deleted even if it's empty (see deallocate_memory)
  587. assert(_root->prev);
  588. page->prev = _root->prev;
  589. page->next = _root;
  590. _root->prev->next = page;
  591. _root->prev = page;
  592. page->busy_size = size;
  593. }
  594. return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
  595. }
  596. PUGI__NS_END
  597. #ifdef PUGIXML_COMPACT
  598. PUGI__NS_BEGIN
  599. static const uintptr_t compact_alignment_log2 = 2;
  600. static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
  601. class compact_header
  602. {
  603. public:
  604. compact_header(xml_memory_page* page, unsigned int flags)
  605. {
  606. PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
  607. ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
  608. assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
  609. _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
  610. _flags = static_cast<unsigned char>(flags);
  611. }
  612. void operator&=(uintptr_t mod)
  613. {
  614. _flags &= static_cast<unsigned char>(mod);
  615. }
  616. void operator|=(uintptr_t mod)
  617. {
  618. _flags |= static_cast<unsigned char>(mod);
  619. }
  620. uintptr_t operator&(uintptr_t mod) const
  621. {
  622. return _flags & mod;
  623. }
  624. xml_memory_page* get_page() const
  625. {
  626. // round-trip through void* to silence 'cast increases required alignment of target type' warnings
  627. const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
  628. const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
  629. return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
  630. }
  631. private:
  632. unsigned char _page;
  633. unsigned char _flags;
  634. };
  635. PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
  636. {
  637. const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
  638. return header->get_page();
  639. }
  640. template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
  641. {
  642. return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object));
  643. }
  644. template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
  645. {
  646. compact_get_page(object, header_offset)->allocator->_hash->insert(object, value);
  647. }
  648. template <typename T, int header_offset, int start = -126> class compact_pointer
  649. {
  650. public:
  651. compact_pointer(): _data(0)
  652. {
  653. }
  654. void operator=(const compact_pointer& rhs)
  655. {
  656. *this = rhs + 0;
  657. }
  658. void operator=(T* value)
  659. {
  660. if (value)
  661. {
  662. // value is guaranteed to be compact-aligned; 'this' is not
  663. // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
  664. // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
  665. // compensate for arithmetic shift rounding for negative values
  666. ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
  667. ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
  668. if (static_cast<uintptr_t>(offset) <= 253)
  669. _data = static_cast<unsigned char>(offset + 1);
  670. else
  671. {
  672. compact_set_value<header_offset>(this, value);
  673. _data = 255;
  674. }
  675. }
  676. else
  677. _data = 0;
  678. }
  679. operator T*() const
  680. {
  681. if (_data)
  682. {
  683. if (_data < 255)
  684. {
  685. uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
  686. return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment);
  687. }
  688. else
  689. return compact_get_value<header_offset, T>(this);
  690. }
  691. else
  692. return 0;
  693. }
  694. T* operator->() const
  695. {
  696. return *this;
  697. }
  698. private:
  699. unsigned char _data;
  700. };
  701. template <typename T, int header_offset> class compact_pointer_parent
  702. {
  703. public:
  704. compact_pointer_parent(): _data(0)
  705. {
  706. }
  707. void operator=(const compact_pointer_parent& rhs)
  708. {
  709. *this = rhs + 0;
  710. }
  711. void operator=(T* value)
  712. {
  713. if (value)
  714. {
  715. // value is guaranteed to be compact-aligned; 'this' is not
  716. // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
  717. // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
  718. // compensate for arithmetic shift behavior for negative values
  719. ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
  720. ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
  721. if (static_cast<uintptr_t>(offset) <= 65533)
  722. {
  723. _data = static_cast<unsigned short>(offset + 1);
  724. }
  725. else
  726. {
  727. xml_memory_page* page = compact_get_page(this, header_offset);
  728. if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
  729. page->compact_shared_parent = value;
  730. if (page->compact_shared_parent == value)
  731. {
  732. _data = 65534;
  733. }
  734. else
  735. {
  736. compact_set_value<header_offset>(this, value);
  737. _data = 65535;
  738. }
  739. }
  740. }
  741. else
  742. {
  743. _data = 0;
  744. }
  745. }
  746. operator T*() const
  747. {
  748. if (_data)
  749. {
  750. if (_data < 65534)
  751. {
  752. uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
  753. return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment);
  754. }
  755. else if (_data == 65534)
  756. return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
  757. else
  758. return compact_get_value<header_offset, T>(this);
  759. }
  760. else
  761. return 0;
  762. }
  763. T* operator->() const
  764. {
  765. return *this;
  766. }
  767. private:
  768. uint16_t _data;
  769. };
  770. template <int header_offset, int base_offset> class compact_string
  771. {
  772. public:
  773. compact_string(): _data(0)
  774. {
  775. }
  776. void operator=(const compact_string& rhs)
  777. {
  778. *this = rhs + 0;
  779. }
  780. void operator=(char_t* value)
  781. {
  782. if (value)
  783. {
  784. xml_memory_page* page = compact_get_page(this, header_offset);
  785. if (PUGI__UNLIKELY(page->compact_string_base == 0))
  786. page->compact_string_base = value;
  787. ptrdiff_t offset = value - page->compact_string_base;
  788. if (static_cast<uintptr_t>(offset) < (65535 << 7))
  789. {
  790. // round-trip through void* to silence 'cast increases required alignment of target type' warnings
  791. uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
  792. if (*base == 0)
  793. {
  794. *base = static_cast<uint16_t>((offset >> 7) + 1);
  795. _data = static_cast<unsigned char>((offset & 127) + 1);
  796. }
  797. else
  798. {
  799. ptrdiff_t remainder = offset - ((*base - 1) << 7);
  800. if (static_cast<uintptr_t>(remainder) <= 253)
  801. {
  802. _data = static_cast<unsigned char>(remainder + 1);
  803. }
  804. else
  805. {
  806. compact_set_value<header_offset>(this, value);
  807. _data = 255;
  808. }
  809. }
  810. }
  811. else
  812. {
  813. compact_set_value<header_offset>(this, value);
  814. _data = 255;
  815. }
  816. }
  817. else
  818. {
  819. _data = 0;
  820. }
  821. }
  822. operator char_t*() const
  823. {
  824. if (_data)
  825. {
  826. if (_data < 255)
  827. {
  828. xml_memory_page* page = compact_get_page(this, header_offset);
  829. // round-trip through void* to silence 'cast increases required alignment of target type' warnings
  830. const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
  831. assert(*base);
  832. ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
  833. return page->compact_string_base + offset;
  834. }
  835. else
  836. {
  837. return compact_get_value<header_offset, char_t>(this);
  838. }
  839. }
  840. else
  841. return 0;
  842. }
  843. private:
  844. unsigned char _data;
  845. };
  846. PUGI__NS_END
  847. #endif
  848. #ifdef PUGIXML_COMPACT
  849. namespace pugi
  850. {
  851. struct xml_attribute_struct
  852. {
  853. xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
  854. {
  855. PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
  856. }
  857. impl::compact_header header;
  858. uint16_t namevalue_base;
  859. impl::compact_string<4, 2> name;
  860. impl::compact_string<5, 3> value;
  861. impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
  862. impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
  863. };
  864. struct xml_node_struct
  865. {
  866. xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
  867. {
  868. PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
  869. }
  870. impl::compact_header header;
  871. uint16_t namevalue_base;
  872. impl::compact_string<4, 2> name;
  873. impl::compact_string<5, 3> value;
  874. impl::compact_pointer_parent<xml_node_struct, 6> parent;
  875. impl::compact_pointer<xml_node_struct, 8, 0> first_child;
  876. impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
  877. impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
  878. impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
  879. };
  880. }
  881. #else
  882. namespace pugi
  883. {
  884. struct xml_attribute_struct
  885. {
  886. xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
  887. {
  888. header = PUGI__GETHEADER_IMPL(this, page, 0);
  889. }
  890. uintptr_t header;
  891. char_t* name;
  892. char_t* value;
  893. xml_attribute_struct* prev_attribute_c;
  894. xml_attribute_struct* next_attribute;
  895. };
  896. struct xml_node_struct
  897. {
  898. xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
  899. {
  900. header = PUGI__GETHEADER_IMPL(this, page, type);
  901. }
  902. uintptr_t header;
  903. char_t* name;
  904. char_t* value;
  905. xml_node_struct* parent;
  906. xml_node_struct* first_child;
  907. xml_node_struct* prev_sibling_c;
  908. xml_node_struct* next_sibling;
  909. xml_attribute_struct* first_attribute;
  910. };
  911. }
  912. #endif
  913. PUGI__NS_BEGIN
  914. struct xml_extra_buffer
  915. {
  916. char_t* buffer;
  917. xml_extra_buffer* next;
  918. };
  919. struct xml_document_struct: public xml_node_struct, public xml_allocator
  920. {
  921. xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
  922. {
  923. }
  924. const char_t* buffer;
  925. xml_extra_buffer* extra_buffers;
  926. #ifdef PUGIXML_COMPACT
  927. compact_hash_table hash;
  928. #endif
  929. };
  930. template <typename Object> inline xml_allocator& get_allocator(const Object* object)
  931. {
  932. assert(object);
  933. return *PUGI__GETPAGE(object)->allocator;
  934. }
  935. template <typename Object> inline xml_document_struct& get_document(const Object* object)
  936. {
  937. assert(object);
  938. return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
  939. }
  940. PUGI__NS_END
  941. // Low-level DOM operations
  942. PUGI__NS_BEGIN
  943. inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
  944. {
  945. xml_memory_page* page;
  946. void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
  947. if (!memory) return 0;
  948. return new (memory) xml_attribute_struct(page);
  949. }
  950. inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
  951. {
  952. xml_memory_page* page;
  953. void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
  954. if (!memory) return 0;
  955. return new (memory) xml_node_struct(page, type);
  956. }
  957. inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
  958. {
  959. if (a->header & impl::xml_memory_page_name_allocated_mask)
  960. alloc.deallocate_string(a->name);
  961. if (a->header & impl::xml_memory_page_value_allocated_mask)
  962. alloc.deallocate_string(a->value);
  963. alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
  964. }
  965. inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
  966. {
  967. if (n->header & impl::xml_memory_page_name_allocated_mask)
  968. alloc.deallocate_string(n->name);
  969. if (n->header & impl::xml_memory_page_value_allocated_mask)
  970. alloc.deallocate_string(n->value);
  971. for (xml_attribute_struct* attr = n->first_attribute; attr; )
  972. {
  973. xml_attribute_struct* next = attr->next_attribute;
  974. destroy_attribute(attr, alloc);
  975. attr = next;
  976. }
  977. for (xml_node_struct* child = n->first_child; child; )
  978. {
  979. xml_node_struct* next = child->next_sibling;
  980. destroy_node(child, alloc);
  981. child = next;
  982. }
  983. alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
  984. }
  985. inline void append_node(xml_node_struct* child, xml_node_struct* node)
  986. {
  987. child->parent = node;
  988. xml_node_struct* head = node->first_child;
  989. if (head)
  990. {
  991. xml_node_struct* tail = head->prev_sibling_c;
  992. tail->next_sibling = child;
  993. child->prev_sibling_c = tail;
  994. head->prev_sibling_c = child;
  995. }
  996. else
  997. {
  998. node->first_child = child;
  999. child->prev_sibling_c = child;
  1000. }
  1001. }
  1002. inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
  1003. {
  1004. child->parent = node;
  1005. xml_node_struct* head = node->first_child;
  1006. if (head)
  1007. {
  1008. child->prev_sibling_c = head->prev_sibling_c;
  1009. head->prev_sibling_c = child;
  1010. }
  1011. else
  1012. child->prev_sibling_c = child;
  1013. child->next_sibling = head;
  1014. node->first_child = child;
  1015. }
  1016. inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
  1017. {
  1018. xml_node_struct* parent = node->parent;
  1019. child->parent = parent;
  1020. if (node->next_sibling)
  1021. node->next_sibling->prev_sibling_c = child;
  1022. else
  1023. parent->first_child->prev_sibling_c = child;
  1024. child->next_sibling = node->next_sibling;
  1025. child->prev_sibling_c = node;
  1026. node->next_sibling = child;
  1027. }
  1028. inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
  1029. {
  1030. xml_node_struct* parent = node->parent;
  1031. child->parent = parent;
  1032. if (node->prev_sibling_c->next_sibling)
  1033. node->prev_sibling_c->next_sibling = child;
  1034. else
  1035. parent->first_child = child;
  1036. child->prev_sibling_c = node->prev_sibling_c;
  1037. child->next_sibling = node;
  1038. node->prev_sibling_c = child;
  1039. }
  1040. inline void remove_node(xml_node_struct* node)
  1041. {
  1042. xml_node_struct* parent = node->parent;
  1043. if (node->next_sibling)
  1044. node->next_sibling->prev_sibling_c = node->prev_sibling_c;
  1045. else
  1046. parent->first_child->prev_sibling_c = node->prev_sibling_c;
  1047. if (node->prev_sibling_c->next_sibling)
  1048. node->prev_sibling_c->next_sibling = node->next_sibling;
  1049. else
  1050. parent->first_child = node->next_sibling;
  1051. node->parent = 0;
  1052. node->prev_sibling_c = 0;
  1053. node->next_sibling = 0;
  1054. }
  1055. inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
  1056. {
  1057. xml_attribute_struct* head = node->first_attribute;
  1058. if (head)
  1059. {
  1060. xml_attribute_struct* tail = head->prev_attribute_c;
  1061. tail->next_attribute = attr;
  1062. attr->prev_attribute_c = tail;
  1063. head->prev_attribute_c = attr;
  1064. }
  1065. else
  1066. {
  1067. node->first_attribute = attr;
  1068. attr->prev_attribute_c = attr;
  1069. }
  1070. }
  1071. inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
  1072. {
  1073. xml_attribute_struct* head = node->first_attribute;
  1074. if (head)
  1075. {
  1076. attr->prev_attribute_c = head->prev_attribute_c;
  1077. head->prev_attribute_c = attr;
  1078. }
  1079. else
  1080. attr->prev_attribute_c = attr;
  1081. attr->next_attribute = head;
  1082. node->first_attribute = attr;
  1083. }
  1084. inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
  1085. {
  1086. if (place->next_attribute)
  1087. place->next_attribute->prev_attribute_c = attr;
  1088. else
  1089. node->first_attribute->prev_attribute_c = attr;
  1090. attr->next_attribute = place->next_attribute;
  1091. attr->prev_attribute_c = place;
  1092. place->next_attribute = attr;
  1093. }
  1094. inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
  1095. {
  1096. if (place->prev_attribute_c->next_attribute)
  1097. place->prev_attribute_c->next_attribute = attr;
  1098. else
  1099. node->first_attribute = attr;
  1100. attr->prev_attribute_c = place->prev_attribute_c;
  1101. attr->next_attribute = place;
  1102. place->prev_attribute_c = attr;
  1103. }
  1104. inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
  1105. {
  1106. if (attr->next_attribute)
  1107. attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
  1108. else
  1109. node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
  1110. if (attr->prev_attribute_c->next_attribute)
  1111. attr->prev_attribute_c->next_attribute = attr->next_attribute;
  1112. else
  1113. node->first_attribute = attr->next_attribute;
  1114. attr->prev_attribute_c = 0;
  1115. attr->next_attribute = 0;
  1116. }
  1117. PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
  1118. {
  1119. if (!alloc.reserve()) return 0;
  1120. xml_node_struct* child = allocate_node(alloc, type);
  1121. if (!child) return 0;
  1122. append_node(child, node);
  1123. return child;
  1124. }
  1125. PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
  1126. {
  1127. if (!alloc.reserve()) return 0;
  1128. xml_attribute_struct* attr = allocate_attribute(alloc);
  1129. if (!attr) return 0;
  1130. append_attribute(attr, node);
  1131. return attr;
  1132. }
  1133. PUGI__NS_END
  1134. // Helper classes for code generation
  1135. PUGI__NS_BEGIN
  1136. struct opt_false
  1137. {
  1138. enum { value = 0 };
  1139. };
  1140. struct opt_true
  1141. {
  1142. enum { value = 1 };
  1143. };
  1144. PUGI__NS_END
  1145. // Unicode utilities
  1146. PUGI__NS_BEGIN
  1147. inline uint16_t endian_swap(uint16_t value)
  1148. {
  1149. return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
  1150. }
  1151. inline uint32_t endian_swap(uint32_t value)
  1152. {
  1153. return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
  1154. }
  1155. struct utf8_counter
  1156. {
  1157. typedef size_t value_type;
  1158. static value_type low(value_type result, uint32_t ch)
  1159. {
  1160. // U+0000..U+007F
  1161. if (ch < 0x80) return result + 1;
  1162. // U+0080..U+07FF
  1163. else if (ch < 0x800) return result + 2;
  1164. // U+0800..U+FFFF
  1165. else return result + 3;
  1166. }
  1167. static value_type high(value_type result, uint32_t)
  1168. {
  1169. // U+10000..U+10FFFF
  1170. return result + 4;
  1171. }
  1172. };
  1173. struct utf8_writer
  1174. {
  1175. typedef uint8_t* value_type;
  1176. static value_type low(value_type result, uint32_t ch)
  1177. {
  1178. // U+0000..U+007F
  1179. if (ch < 0x80)
  1180. {
  1181. *result = static_cast<uint8_t>(ch);
  1182. return result + 1;
  1183. }
  1184. // U+0080..U+07FF
  1185. else if (ch < 0x800)
  1186. {
  1187. result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
  1188. result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
  1189. return result + 2;
  1190. }
  1191. // U+0800..U+FFFF
  1192. else
  1193. {
  1194. result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
  1195. result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
  1196. result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
  1197. return result + 3;
  1198. }
  1199. }
  1200. static value_type high(value_type result, uint32_t ch)
  1201. {
  1202. // U+10000..U+10FFFF
  1203. result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
  1204. result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
  1205. result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
  1206. result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
  1207. return result + 4;
  1208. }
  1209. static value_type any(value_type result, uint32_t ch)
  1210. {
  1211. return (ch < 0x10000) ? low(result, ch) : high(result, ch);
  1212. }
  1213. };
  1214. struct utf16_counter
  1215. {
  1216. typedef size_t value_type;
  1217. static value_type low(value_type result, uint32_t)
  1218. {
  1219. return result + 1;
  1220. }
  1221. static value_type high(value_type result, uint32_t)
  1222. {
  1223. return result + 2;
  1224. }
  1225. };
  1226. struct utf16_writer
  1227. {
  1228. typedef uint16_t* value_type;
  1229. static value_type low(value_type result, uint32_t ch)
  1230. {
  1231. *result = static_cast<uint16_t>(ch);
  1232. return result + 1;
  1233. }
  1234. static value_type high(value_type result, uint32_t ch)
  1235. {
  1236. uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
  1237. uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
  1238. result[0] = static_cast<uint16_t>(0xD800 + msh);
  1239. result[1] = static_cast<uint16_t>(0xDC00 + lsh);
  1240. return result + 2;
  1241. }
  1242. static value_type any(value_type result, uint32_t ch)
  1243. {
  1244. return (ch < 0x10000) ? low(result, ch) : high(result, ch);
  1245. }
  1246. };
  1247. struct utf32_counter
  1248. {
  1249. typedef size_t value_type;
  1250. static value_type low(value_type result, uint32_t)
  1251. {
  1252. return result + 1;
  1253. }
  1254. static value_type high(value_type result, uint32_t)
  1255. {
  1256. return result + 1;
  1257. }
  1258. };
  1259. struct utf32_writer
  1260. {
  1261. typedef uint32_t* value_type;
  1262. static value_type low(value_type result, uint32_t ch)
  1263. {
  1264. *result = ch;
  1265. return result + 1;
  1266. }
  1267. static value_type high(value_type result, uint32_t ch)
  1268. {
  1269. *result = ch;
  1270. return result + 1;
  1271. }
  1272. static value_type any(value_type result, uint32_t ch)
  1273. {
  1274. *result = ch;
  1275. return result + 1;
  1276. }
  1277. };
  1278. struct latin1_writer
  1279. {
  1280. typedef uint8_t* value_type;
  1281. static value_type low(value_type result, uint32_t ch)
  1282. {
  1283. *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
  1284. return result + 1;
  1285. }
  1286. static value_type high(value_type result, uint32_t ch)
  1287. {
  1288. (void)ch;
  1289. *result = '?';
  1290. return result + 1;
  1291. }
  1292. };
  1293. struct utf8_decoder
  1294. {
  1295. typedef uint8_t type;
  1296. template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
  1297. {
  1298. const uint8_t utf8_byte_mask = 0x3f;
  1299. while (size)
  1300. {
  1301. uint8_t lead = *data;
  1302. // 0xxxxxxx -> U+0000..U+007F
  1303. if (lead < 0x80)
  1304. {
  1305. result = Traits::low(result, lead);
  1306. data += 1;
  1307. size -= 1;
  1308. // process aligned single-byte (ascii) blocks
  1309. if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
  1310. {
  1311. // round-trip through void* to silence 'cast increases required alignment of target type' warnings
  1312. while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
  1313. {
  1314. result = Traits::low(result, data[0]);
  1315. result = Traits::low(result, data[1]);
  1316. result = Traits::low(result, data[2]);
  1317. result = Traits::low(result, data[3]);
  1318. data += 4;
  1319. size -= 4;
  1320. }
  1321. }
  1322. }
  1323. // 110xxxxx -> U+0080..U+07FF
  1324. else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
  1325. {
  1326. result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
  1327. data += 2;
  1328. size -= 2;
  1329. }
  1330. // 1110xxxx -> U+0800-U+FFFF
  1331. else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
  1332. {
  1333. result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
  1334. data += 3;
  1335. size -= 3;
  1336. }
  1337. // 11110xxx -> U+10000..U+10FFFF
  1338. else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
  1339. {
  1340. result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
  1341. data += 4;
  1342. size -= 4;
  1343. }
  1344. // 10xxxxxx or 11111xxx -> invalid
  1345. else
  1346. {
  1347. data += 1;
  1348. size -= 1;
  1349. }
  1350. }
  1351. return result;
  1352. }
  1353. };
  1354. template <typename opt_swap> struct utf16_decoder
  1355. {
  1356. typedef uint16_t type;
  1357. template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
  1358. {
  1359. while (size)
  1360. {
  1361. uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
  1362. // U+0000..U+D7FF
  1363. if (lead < 0xD800)
  1364. {
  1365. result = Traits::low(result, lead);
  1366. data += 1;
  1367. size -= 1;
  1368. }
  1369. // U+E000..U+FFFF
  1370. else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
  1371. {
  1372. result = Traits::low(result, lead);
  1373. data += 1;
  1374. size -= 1;
  1375. }
  1376. // surrogate pair lead
  1377. else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
  1378. {
  1379. uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
  1380. if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
  1381. {
  1382. result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
  1383. data += 2;
  1384. size -= 2;
  1385. }
  1386. else
  1387. {
  1388. data += 1;
  1389. size -= 1;
  1390. }
  1391. }
  1392. else
  1393. {
  1394. data += 1;
  1395. size -= 1;
  1396. }
  1397. }
  1398. return result;
  1399. }
  1400. };
  1401. template <typename opt_swap> struct utf32_decoder
  1402. {
  1403. typedef uint32_t type;
  1404. template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
  1405. {
  1406. while (size)
  1407. {
  1408. uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
  1409. // U+0000..U+FFFF
  1410. if (lead < 0x10000)
  1411. {
  1412. result = Traits::low(result, lead);
  1413. data += 1;
  1414. size -= 1;
  1415. }
  1416. // U+10000..U+10FFFF
  1417. else
  1418. {
  1419. result = Traits::high(result, lead);
  1420. data += 1;
  1421. size -= 1;
  1422. }
  1423. }
  1424. return result;
  1425. }
  1426. };
  1427. struct latin1_decoder
  1428. {
  1429. typedef uint8_t type;
  1430. template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
  1431. {
  1432. while (size)
  1433. {
  1434. result = Traits::low(result, *data);
  1435. data += 1;
  1436. size -= 1;
  1437. }
  1438. return result;
  1439. }
  1440. };
  1441. template <size_t size> struct wchar_selector;
  1442. template <> struct wchar_selector<2>
  1443. {
  1444. typedef uint16_t type;
  1445. typedef utf16_counter counter;
  1446. typedef utf16_writer writer;
  1447. typedef utf16_decoder<opt_false> decoder;
  1448. };
  1449. template <> struct wchar_selector<4>
  1450. {
  1451. typedef uint32_t type;
  1452. typedef utf32_counter counter;
  1453. typedef utf32_writer writer;
  1454. typedef utf32_decoder<opt_false> decoder;
  1455. };
  1456. typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
  1457. typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
  1458. struct wchar_decoder
  1459. {
  1460. typedef wchar_t type;
  1461. template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
  1462. {
  1463. typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
  1464. return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
  1465. }
  1466. };
  1467. #ifdef PUGIXML_WCHAR_MODE
  1468. PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
  1469. {
  1470. for (size_t i = 0; i < length; ++i)
  1471. result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
  1472. }
  1473. #endif
  1474. PUGI__NS_END
  1475. PUGI__NS_BEGIN
  1476. enum chartype_t
  1477. {
  1478. ct_parse_pcdata = 1, // \0, &, \r, <
  1479. ct_parse_attr = 2, // \0, &, \r, ', "
  1480. ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
  1481. ct_space = 8, // \r, \n, space, tab
  1482. ct_parse_cdata = 16, // \0, ], >, \r
  1483. ct_parse_comment = 32, // \0, -, >, \r
  1484. ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
  1485. ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
  1486. };
  1487. static const unsigned char chartype_table[256] =
  1488. {
  1489. 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
  1490. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
  1491. 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
  1492. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
  1493. 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
  1494. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
  1495. 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
  1496. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
  1497. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
  1498. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1499. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1500. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1501. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1502. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1503. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1504. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
  1505. };
  1506. enum chartypex_t
  1507. {
  1508. ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
  1509. ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, ", '
  1510. ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
  1511. ctx_digit = 8, // 0-9
  1512. ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
  1513. };
  1514. static const unsigned char chartypex_table[256] =
  1515. {
  1516. 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15
  1517. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
  1518. 0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
  1519. 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 1, 0, // 48-63
  1520. 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
  1521. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
  1522. 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
  1523. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
  1524. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
  1525. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1526. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1527. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1528. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1529. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1530. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1531. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
  1532. };
  1533. #ifdef PUGIXML_WCHAR_MODE
  1534. #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
  1535. #else
  1536. #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
  1537. #endif
  1538. #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
  1539. #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
  1540. PUGI__FN bool is_little_endian()
  1541. {
  1542. unsigned int ui = 1;
  1543. return *reinterpret_cast<unsigned char*>(&ui) == 1;
  1544. }
  1545. PUGI__FN xml_encoding get_wchar_encoding()
  1546. {
  1547. PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
  1548. if (sizeof(wchar_t) == 2)
  1549. return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  1550. else
  1551. return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  1552. }
  1553. PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
  1554. {
  1555. #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
  1556. #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
  1557. // check if we have a non-empty XML declaration
  1558. if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
  1559. return false;
  1560. // scan XML declaration until the encoding field
  1561. for (size_t i = 6; i + 1 < size; ++i)
  1562. {
  1563. // declaration can not contain ? in quoted values
  1564. if (data[i] == '?')
  1565. return false;
  1566. if (data[i] == 'e' && data[i + 1] == 'n')
  1567. {
  1568. size_t offset = i;
  1569. // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
  1570. PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
  1571. PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
  1572. // S? = S?
  1573. PUGI__SCANCHARTYPE(ct_space);
  1574. PUGI__SCANCHAR('=');
  1575. PUGI__SCANCHARTYPE(ct_space);
  1576. // the only two valid delimiters are ' and "
  1577. uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
  1578. PUGI__SCANCHAR(delimiter);
  1579. size_t start = offset;
  1580. out_encoding = data + offset;
  1581. PUGI__SCANCHARTYPE(ct_symbol);
  1582. out_length = offset - start;
  1583. PUGI__SCANCHAR(delimiter);
  1584. return true;
  1585. }
  1586. }
  1587. return false;
  1588. #undef PUGI__SCANCHAR
  1589. #undef PUGI__SCANCHARTYPE
  1590. }
  1591. PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
  1592. {
  1593. // skip encoding autodetection if input buffer is too small
  1594. if (size < 4) return encoding_utf8;
  1595. uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
  1596. // look for BOM in first few bytes
  1597. if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
  1598. if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
  1599. if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
  1600. if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
  1601. if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
  1602. // look for <, <? or <?xm in various encodings
  1603. if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
  1604. if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
  1605. if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
  1606. if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
  1607. // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
  1608. if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
  1609. if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
  1610. // no known BOM detected; parse declaration
  1611. const uint8_t* enc = 0;
  1612. size_t enc_length = 0;
  1613. if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
  1614. {
  1615. // iso-8859-1 (case-insensitive)
  1616. if (enc_length == 10
  1617. && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
  1618. && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
  1619. && enc[8] == '-' && enc[9] == '1')
  1620. return encoding_latin1;
  1621. // latin1 (case-insensitive)
  1622. if (enc_length == 6
  1623. && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
  1624. && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
  1625. && enc[5] == '1')
  1626. return encoding_latin1;
  1627. }
  1628. return encoding_utf8;
  1629. }
  1630. PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
  1631. {
  1632. // replace wchar encoding with utf implementation
  1633. if (encoding == encoding_wchar) return get_wchar_encoding();
  1634. // replace utf16 encoding with utf16 with specific endianness
  1635. if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  1636. // replace utf32 encoding with utf32 with specific endianness
  1637. if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  1638. // only do autodetection if no explicit encoding is requested
  1639. if (encoding != encoding_auto) return encoding;
  1640. // try to guess encoding (based on XML specification, Appendix F.1)
  1641. const uint8_t* data = static_cast<const uint8_t*>(contents);
  1642. return guess_buffer_encoding(data, size);
  1643. }
  1644. PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
  1645. {
  1646. size_t length = size / sizeof(char_t);
  1647. if (is_mutable)
  1648. {
  1649. out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
  1650. out_length = length;
  1651. }
  1652. else
  1653. {
  1654. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1655. if (!buffer) return false;
  1656. if (contents)
  1657. memcpy(buffer, contents, length * sizeof(char_t));
  1658. else
  1659. assert(length == 0);
  1660. buffer[length] = 0;
  1661. out_buffer = buffer;
  1662. out_length = length + 1;
  1663. }
  1664. return true;
  1665. }
  1666. #ifdef PUGIXML_WCHAR_MODE
  1667. PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
  1668. {
  1669. return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
  1670. (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
  1671. }
  1672. PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
  1673. {
  1674. const char_t* data = static_cast<const char_t*>(contents);
  1675. size_t length = size / sizeof(char_t);
  1676. if (is_mutable)
  1677. {
  1678. char_t* buffer = const_cast<char_t*>(data);
  1679. convert_wchar_endian_swap(buffer, data, length);
  1680. out_buffer = buffer;
  1681. out_length = length;
  1682. }
  1683. else
  1684. {
  1685. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1686. if (!buffer) return false;
  1687. convert_wchar_endian_swap(buffer, data, length);
  1688. buffer[length] = 0;
  1689. out_buffer = buffer;
  1690. out_length = length + 1;
  1691. }
  1692. return true;
  1693. }
  1694. template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
  1695. {
  1696. const typename D::type* data = static_cast<const typename D::type*>(contents);
  1697. size_t data_length = size / sizeof(typename D::type);
  1698. // first pass: get length in wchar_t units
  1699. size_t length = D::process(data, data_length, 0, wchar_counter());
  1700. // allocate buffer of suitable length
  1701. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1702. if (!buffer) return false;
  1703. // second pass: convert utf16 input to wchar_t
  1704. wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
  1705. wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
  1706. assert(oend == obegin + length);
  1707. *oend = 0;
  1708. out_buffer = buffer;
  1709. out_length = length + 1;
  1710. return true;
  1711. }
  1712. PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
  1713. {
  1714. // get native encoding
  1715. xml_encoding wchar_encoding = get_wchar_encoding();
  1716. // fast path: no conversion required
  1717. if (encoding == wchar_encoding)
  1718. return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
  1719. // only endian-swapping is required
  1720. if (need_endian_swap_utf(encoding, wchar_encoding))
  1721. return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
  1722. // source encoding is utf8
  1723. if (encoding == encoding_utf8)
  1724. return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
  1725. // source encoding is utf16
  1726. if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
  1727. {
  1728. xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  1729. return (native_encoding == encoding) ?
  1730. convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
  1731. convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
  1732. }
  1733. // source encoding is utf32
  1734. if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
  1735. {
  1736. xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  1737. return (native_encoding == encoding) ?
  1738. convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
  1739. convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
  1740. }
  1741. // source encoding is latin1
  1742. if (encoding == encoding_latin1)
  1743. return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
  1744. assert(false && "Invalid encoding"); // unreachable
  1745. return false;
  1746. }
  1747. #else
  1748. template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
  1749. {
  1750. const typename D::type* data = static_cast<const typename D::type*>(contents);
  1751. size_t data_length = size / sizeof(typename D::type);
  1752. // first pass: get length in utf8 units
  1753. size_t length = D::process(data, data_length, 0, utf8_counter());
  1754. // allocate buffer of suitable length
  1755. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1756. if (!buffer) return false;
  1757. // second pass: convert utf16 input to utf8
  1758. uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
  1759. uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
  1760. assert(oend == obegin + length);
  1761. *oend = 0;
  1762. out_buffer = buffer;
  1763. out_length = length + 1;
  1764. return true;
  1765. }
  1766. PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
  1767. {
  1768. for (size_t i = 0; i < size; ++i)
  1769. if (data[i] > 127)
  1770. return i;
  1771. return size;
  1772. }
  1773. PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
  1774. {
  1775. const uint8_t* data = static_cast<const uint8_t*>(contents);
  1776. size_t data_length = size;
  1777. // get size of prefix that does not need utf8 conversion
  1778. size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
  1779. assert(prefix_length <= data_length);
  1780. const uint8_t* postfix = data + prefix_length;
  1781. size_t postfix_length = data_length - prefix_length;
  1782. // if no conversion is needed, just return the original buffer
  1783. if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
  1784. // first pass: get length in utf8 units
  1785. size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
  1786. // allocate buffer of suitable length
  1787. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1788. if (!buffer) return false;
  1789. // second pass: convert latin1 input to utf8
  1790. memcpy(buffer, data, prefix_length);
  1791. uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
  1792. uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
  1793. assert(oend == obegin + length);
  1794. *oend = 0;
  1795. out_buffer = buffer;
  1796. out_length = length + 1;
  1797. return true;
  1798. }
  1799. PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
  1800. {
  1801. // fast path: no conversion required
  1802. if (encoding == encoding_utf8)
  1803. return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
  1804. // source encoding is utf16
  1805. if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
  1806. {
  1807. xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  1808. return (native_encoding == encoding) ?
  1809. convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
  1810. convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
  1811. }
  1812. // source encoding is utf32
  1813. if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
  1814. {
  1815. xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  1816. return (native_encoding == encoding) ?
  1817. convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
  1818. convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
  1819. }
  1820. // source encoding is latin1
  1821. if (encoding == encoding_latin1)
  1822. return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
  1823. assert(false && "Invalid encoding"); // unreachable
  1824. return false;
  1825. }
  1826. #endif
  1827. PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
  1828. {
  1829. // get length in utf8 characters
  1830. return wchar_decoder::process(str, length, 0, utf8_counter());
  1831. }
  1832. PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
  1833. {
  1834. // convert to utf8
  1835. uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
  1836. uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
  1837. assert(begin + size == end);
  1838. (void)!end;
  1839. (void)!size;
  1840. }
  1841. #ifndef PUGIXML_NO_STL
  1842. PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
  1843. {
  1844. // first pass: get length in utf8 characters
  1845. size_t size = as_utf8_begin(str, length);
  1846. // allocate resulting string
  1847. std::string result;
  1848. result.resize(size);
  1849. // second pass: convert to utf8
  1850. if (size > 0) as_utf8_end(&result[0], size, str, length);
  1851. return result;
  1852. }
  1853. PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
  1854. {
  1855. const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
  1856. // first pass: get length in wchar_t units
  1857. size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
  1858. // allocate resulting string
  1859. std::basic_string<wchar_t> result;
  1860. result.resize(length);
  1861. // second pass: convert to wchar_t
  1862. if (length > 0)
  1863. {
  1864. wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
  1865. wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
  1866. assert(begin + length == end);
  1867. (void)!end;
  1868. }
  1869. return result;
  1870. }
  1871. #endif
  1872. template <typename Header>
  1873. inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
  1874. {
  1875. // never reuse shared memory
  1876. if (header & xml_memory_page_contents_shared_mask) return false;
  1877. size_t target_length = strlength(target);
  1878. // always reuse document buffer memory if possible
  1879. if ((header & header_mask) == 0) return target_length >= length;
  1880. // reuse heap memory if waste is not too great
  1881. const size_t reuse_threshold = 32;
  1882. return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
  1883. }
  1884. template <typename String, typename Header>
  1885. PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
  1886. {
  1887. if (source_length == 0)
  1888. {
  1889. // empty string and null pointer are equivalent, so just deallocate old memory
  1890. xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
  1891. if (header & header_mask) alloc->deallocate_string(dest);
  1892. // mark the string as not allocated
  1893. dest = 0;
  1894. header &= ~header_mask;
  1895. return true;
  1896. }
  1897. else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
  1898. {
  1899. // we can reuse old buffer, so just copy the new data (including zero terminator)
  1900. memcpy(dest, source, source_length * sizeof(char_t));
  1901. dest[source_length] = 0;
  1902. return true;
  1903. }
  1904. else
  1905. {
  1906. xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
  1907. if (!alloc->reserve()) return false;
  1908. // allocate new buffer
  1909. char_t* buf = alloc->allocate_string(source_length + 1);
  1910. if (!buf) return false;
  1911. // copy the string (including zero terminator)
  1912. memcpy(buf, source, source_length * sizeof(char_t));
  1913. buf[source_length] = 0;
  1914. // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
  1915. if (header & header_mask) alloc->deallocate_string(dest);
  1916. // the string is now allocated, so set the flag
  1917. dest = buf;
  1918. header |= header_mask;
  1919. return true;
  1920. }
  1921. }
  1922. struct gap
  1923. {
  1924. char_t* end;
  1925. size_t size;
  1926. gap(): end(0), size(0)
  1927. {
  1928. }
  1929. // Push new gap, move s count bytes further (skipping the gap).
  1930. // Collapse previous gap.
  1931. void push(char_t*& s, size_t count)
  1932. {
  1933. if (end) // there was a gap already; collapse it
  1934. {
  1935. // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
  1936. assert(s >= end);
  1937. memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
  1938. }
  1939. s += count; // end of current gap
  1940. // "merge" two gaps
  1941. end = s;
  1942. size += count;
  1943. }
  1944. // Collapse all gaps, return past-the-end pointer
  1945. char_t* flush(char_t* s)
  1946. {
  1947. if (end)
  1948. {
  1949. // Move [old_gap_end, current_pos) to [old_gap_start, ...)
  1950. assert(s >= end);
  1951. memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
  1952. return s - size;
  1953. }
  1954. else return s;
  1955. }
  1956. };
  1957. PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
  1958. {
  1959. char_t* stre = s + 1;
  1960. switch (*stre)
  1961. {
  1962. case '#': // &#...
  1963. {
  1964. unsigned int ucsc = 0;
  1965. if (stre[1] == 'x') // &#x... (hex code)
  1966. {
  1967. stre += 2;
  1968. char_t ch = *stre;
  1969. if (ch == ';') return stre;
  1970. for (;;)
  1971. {
  1972. if (static_cast<unsigned int>(ch - '0') <= 9)
  1973. ucsc = 16 * ucsc + (ch - '0');
  1974. else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
  1975. ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
  1976. else if (ch == ';')
  1977. break;
  1978. else // cancel
  1979. return stre;
  1980. ch = *++stre;
  1981. }
  1982. ++stre;
  1983. }
  1984. else // &#... (dec code)
  1985. {
  1986. char_t ch = *++stre;
  1987. if (ch == ';') return stre;
  1988. for (;;)
  1989. {
  1990. if (static_cast<unsigned int>(ch - '0') <= 9)
  1991. ucsc = 10 * ucsc + (ch - '0');
  1992. else if (ch == ';')
  1993. break;
  1994. else // cancel
  1995. return stre;
  1996. ch = *++stre;
  1997. }
  1998. ++stre;
  1999. }
  2000. #ifdef PUGIXML_WCHAR_MODE
  2001. s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
  2002. #else
  2003. s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
  2004. #endif
  2005. g.push(s, stre - s);
  2006. return stre;
  2007. }
  2008. case 'a': // &a
  2009. {
  2010. ++stre;
  2011. if (*stre == 'm') // &am
  2012. {
  2013. if (*++stre == 'p' && *++stre == ';') // &amp;
  2014. {
  2015. *s++ = '&';
  2016. ++stre;
  2017. g.push(s, stre - s);
  2018. return stre;
  2019. }
  2020. }
  2021. else if (*stre == 'p') // &ap
  2022. {
  2023. if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
  2024. {
  2025. *s++ = '\'';
  2026. ++stre;
  2027. g.push(s, stre - s);
  2028. return stre;
  2029. }
  2030. }
  2031. break;
  2032. }
  2033. case 'g': // &g
  2034. {
  2035. if (*++stre == 't' && *++stre == ';') // &gt;
  2036. {
  2037. *s++ = '>';
  2038. ++stre;
  2039. g.push(s, stre - s);
  2040. return stre;
  2041. }
  2042. break;
  2043. }
  2044. case 'l': // &l
  2045. {
  2046. if (*++stre == 't' && *++stre == ';') // &lt;
  2047. {
  2048. *s++ = '<';
  2049. ++stre;
  2050. g.push(s, stre - s);
  2051. return stre;
  2052. }
  2053. break;
  2054. }
  2055. case 'q': // &q
  2056. {
  2057. if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
  2058. {
  2059. *s++ = '"';
  2060. ++stre;
  2061. g.push(s, stre - s);
  2062. return stre;
  2063. }
  2064. break;
  2065. }
  2066. default:
  2067. break;
  2068. }
  2069. return stre;
  2070. }
  2071. // Parser utilities
  2072. #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
  2073. #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
  2074. #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
  2075. #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
  2076. #define PUGI__POPNODE() { cursor = cursor->parent; }
  2077. #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
  2078. #define PUGI__SCANWHILE(X) { while (X) ++s; }
  2079. #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
  2080. #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
  2081. #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
  2082. #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
  2083. PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
  2084. {
  2085. gap g;
  2086. while (true)
  2087. {
  2088. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
  2089. if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
  2090. {
  2091. *s++ = '\n'; // replace first one with 0x0a
  2092. if (*s == '\n') g.push(s, 1);
  2093. }
  2094. else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
  2095. {
  2096. *g.flush(s) = 0;
  2097. return s + (s[2] == '>' ? 3 : 2);
  2098. }
  2099. else if (*s == 0)
  2100. {
  2101. return 0;
  2102. }
  2103. else ++s;
  2104. }
  2105. }
  2106. PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
  2107. {
  2108. gap g;
  2109. while (true)
  2110. {
  2111. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
  2112. if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
  2113. {
  2114. *s++ = '\n'; // replace first one with 0x0a
  2115. if (*s == '\n') g.push(s, 1);
  2116. }
  2117. else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
  2118. {
  2119. *g.flush(s) = 0;
  2120. return s + 1;
  2121. }
  2122. else if (*s == 0)
  2123. {
  2124. return 0;
  2125. }
  2126. else ++s;
  2127. }
  2128. }
  2129. typedef char_t* (*strconv_pcdata_t)(char_t*);
  2130. template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
  2131. {
  2132. static char_t* parse(char_t* s)
  2133. {
  2134. gap g;
  2135. char_t* begin = s;
  2136. while (true)
  2137. {
  2138. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
  2139. if (*s == '<') // PCDATA ends here
  2140. {
  2141. char_t* end = g.flush(s);
  2142. if (opt_trim::value)
  2143. while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
  2144. --end;
  2145. *end = 0;
  2146. return s + 1;
  2147. }
  2148. else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
  2149. {
  2150. *s++ = '\n'; // replace first one with 0x0a
  2151. if (*s == '\n') g.push(s, 1);
  2152. }
  2153. else if (opt_escape::value && *s == '&')
  2154. {
  2155. s = strconv_escape(s, g);
  2156. }
  2157. else if (*s == 0)
  2158. {
  2159. char_t* end = g.flush(s);
  2160. if (opt_trim::value)
  2161. while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
  2162. --end;
  2163. *end = 0;
  2164. return s;
  2165. }
  2166. else ++s;
  2167. }
  2168. }
  2169. };
  2170. PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
  2171. {
  2172. PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
  2173. switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above
  2174. {
  2175. case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
  2176. case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
  2177. case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
  2178. case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
  2179. case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
  2180. case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
  2181. case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
  2182. case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
  2183. default: assert(false); return 0; // unreachable
  2184. }
  2185. }
  2186. typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
  2187. template <typename opt_escape> struct strconv_attribute_impl
  2188. {
  2189. static char_t* parse_wnorm(char_t* s, char_t end_quote)
  2190. {
  2191. gap g;
  2192. // trim leading whitespaces
  2193. if (PUGI__IS_CHARTYPE(*s, ct_space))
  2194. {
  2195. char_t* str = s;
  2196. do ++str;
  2197. while (PUGI__IS_CHARTYPE(*str, ct_space));
  2198. g.push(s, str - s);
  2199. }
  2200. while (true)
  2201. {
  2202. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
  2203. if (*s == end_quote)
  2204. {
  2205. char_t* str = g.flush(s);
  2206. do *str-- = 0;
  2207. while (PUGI__IS_CHARTYPE(*str, ct_space));
  2208. return s + 1;
  2209. }
  2210. else if (PUGI__IS_CHARTYPE(*s, ct_space))
  2211. {
  2212. *s++ = ' ';
  2213. if (PUGI__IS_CHARTYPE(*s, ct_space))
  2214. {
  2215. char_t* str = s + 1;
  2216. while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
  2217. g.push(s, str - s);
  2218. }
  2219. }
  2220. else if (opt_escape::value && *s == '&')
  2221. {
  2222. s = strconv_escape(s, g);
  2223. }
  2224. else if (!*s)
  2225. {
  2226. return 0;
  2227. }
  2228. else ++s;
  2229. }
  2230. }
  2231. static char_t* parse_wconv(char_t* s, char_t end_quote)
  2232. {
  2233. gap g;
  2234. while (true)
  2235. {
  2236. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
  2237. if (*s == end_quote)
  2238. {
  2239. *g.flush(s) = 0;
  2240. return s + 1;
  2241. }
  2242. else if (PUGI__IS_CHARTYPE(*s, ct_space))
  2243. {
  2244. if (*s == '\r')
  2245. {
  2246. *s++ = ' ';
  2247. if (*s == '\n') g.push(s, 1);
  2248. }
  2249. else *s++ = ' ';
  2250. }
  2251. else if (opt_escape::value && *s == '&')
  2252. {
  2253. s = strconv_escape(s, g);
  2254. }
  2255. else if (!*s)
  2256. {
  2257. return 0;
  2258. }
  2259. else ++s;
  2260. }
  2261. }
  2262. static char_t* parse_eol(char_t* s, char_t end_quote)
  2263. {
  2264. gap g;
  2265. while (true)
  2266. {
  2267. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
  2268. if (*s == end_quote)
  2269. {
  2270. *g.flush(s) = 0;
  2271. return s + 1;
  2272. }
  2273. else if (*s == '\r')
  2274. {
  2275. *s++ = '\n';
  2276. if (*s == '\n') g.push(s, 1);
  2277. }
  2278. else if (opt_escape::value && *s == '&')
  2279. {
  2280. s = strconv_escape(s, g);
  2281. }
  2282. else if (!*s)
  2283. {
  2284. return 0;
  2285. }
  2286. else ++s;
  2287. }
  2288. }
  2289. static char_t* parse_simple(char_t* s, char_t end_quote)
  2290. {
  2291. gap g;
  2292. while (true)
  2293. {
  2294. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
  2295. if (*s == end_quote)
  2296. {
  2297. *g.flush(s) = 0;
  2298. return s + 1;
  2299. }
  2300. else if (opt_escape::value && *s == '&')
  2301. {
  2302. s = strconv_escape(s, g);
  2303. }
  2304. else if (!*s)
  2305. {
  2306. return 0;
  2307. }
  2308. else ++s;
  2309. }
  2310. }
  2311. };
  2312. PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
  2313. {
  2314. PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
  2315. switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above
  2316. {
  2317. case 0: return strconv_attribute_impl<opt_false>::parse_simple;
  2318. case 1: return strconv_attribute_impl<opt_true>::parse_simple;
  2319. case 2: return strconv_attribute_impl<opt_false>::parse_eol;
  2320. case 3: return strconv_attribute_impl<opt_true>::parse_eol;
  2321. case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
  2322. case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
  2323. case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
  2324. case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
  2325. case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
  2326. case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
  2327. case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
  2328. case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
  2329. case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
  2330. case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
  2331. case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
  2332. case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
  2333. default: assert(false); return 0; // unreachable
  2334. }
  2335. }
  2336. inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
  2337. {
  2338. xml_parse_result result;
  2339. result.status = status;
  2340. result.offset = offset;
  2341. return result;
  2342. }
  2343. struct xml_parser
  2344. {
  2345. xml_allocator* alloc;
  2346. char_t* error_offset;
  2347. xml_parse_status error_status;
  2348. xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
  2349. {
  2350. }
  2351. // DOCTYPE consists of nested sections of the following possible types:
  2352. // <!-- ... -->, <? ... ?>, "...", '...'
  2353. // <![...]]>
  2354. // <!...>
  2355. // First group can not contain nested groups
  2356. // Second group can contain nested groups of the same type
  2357. // Third group can contain all other groups
  2358. char_t* parse_doctype_primitive(char_t* s)
  2359. {
  2360. if (*s == '"' || *s == '\'')
  2361. {
  2362. // quoted string
  2363. char_t ch = *s++;
  2364. PUGI__SCANFOR(*s == ch);
  2365. if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
  2366. s++;
  2367. }
  2368. else if (s[0] == '<' && s[1] == '?')
  2369. {
  2370. // <? ... ?>
  2371. s += 2;
  2372. PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
  2373. if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
  2374. s += 2;
  2375. }
  2376. else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
  2377. {
  2378. s += 4;
  2379. PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
  2380. if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
  2381. s += 3;
  2382. }
  2383. else PUGI__THROW_ERROR(status_bad_doctype, s);
  2384. return s;
  2385. }
  2386. char_t* parse_doctype_ignore(char_t* s)
  2387. {
  2388. size_t depth = 0;
  2389. assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
  2390. s += 3;
  2391. while (*s)
  2392. {
  2393. if (s[0] == '<' && s[1] == '!' && s[2] == '[')
  2394. {
  2395. // nested ignore section
  2396. s += 3;
  2397. depth++;
  2398. }
  2399. else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
  2400. {
  2401. // ignore section end
  2402. s += 3;
  2403. if (depth == 0)
  2404. return s;
  2405. depth--;
  2406. }
  2407. else s++;
  2408. }
  2409. PUGI__THROW_ERROR(status_bad_doctype, s);
  2410. }
  2411. char_t* parse_doctype_group(char_t* s, char_t endch)
  2412. {
  2413. size_t depth = 0;
  2414. assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
  2415. s += 2;
  2416. while (*s)
  2417. {
  2418. if (s[0] == '<' && s[1] == '!' && s[2] != '-')
  2419. {
  2420. if (s[2] == '[')
  2421. {
  2422. // ignore
  2423. s = parse_doctype_ignore(s);
  2424. if (!s) return s;
  2425. }
  2426. else
  2427. {
  2428. // some control group
  2429. s += 2;
  2430. depth++;
  2431. }
  2432. }
  2433. else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
  2434. {
  2435. // unknown tag (forbidden), or some primitive group
  2436. s = parse_doctype_primitive(s);
  2437. if (!s) return s;
  2438. }
  2439. else if (*s == '>')
  2440. {
  2441. if (depth == 0)
  2442. return s;
  2443. depth--;
  2444. s++;
  2445. }
  2446. else s++;
  2447. }
  2448. if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
  2449. return s;
  2450. }
  2451. char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
  2452. {
  2453. // parse node contents, starting with exclamation mark
  2454. ++s;
  2455. if (*s == '-') // '<!-...'
  2456. {
  2457. ++s;
  2458. if (*s == '-') // '<!--...'
  2459. {
  2460. ++s;
  2461. if (PUGI__OPTSET(parse_comments))
  2462. {
  2463. PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
  2464. cursor->value = s; // Save the offset.
  2465. }
  2466. if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
  2467. {
  2468. s = strconv_comment(s, endch);
  2469. if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
  2470. }
  2471. else
  2472. {
  2473. // Scan for terminating '-->'.
  2474. PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
  2475. PUGI__CHECK_ERROR(status_bad_comment, s);
  2476. if (PUGI__OPTSET(parse_comments))
  2477. *s = 0; // Zero-terminate this segment at the first terminating '-'.
  2478. s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
  2479. }
  2480. }
  2481. else PUGI__THROW_ERROR(status_bad_comment, s);
  2482. }
  2483. else if (*s == '[')
  2484. {
  2485. // '<![CDATA[...'
  2486. if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
  2487. {
  2488. ++s;
  2489. if (PUGI__OPTSET(parse_cdata))
  2490. {
  2491. PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
  2492. cursor->value = s; // Save the offset.
  2493. if (PUGI__OPTSET(parse_eol))
  2494. {
  2495. s = strconv_cdata(s, endch);
  2496. if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
  2497. }
  2498. else
  2499. {
  2500. // Scan for terminating ']]>'.
  2501. PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
  2502. PUGI__CHECK_ERROR(status_bad_cdata, s);
  2503. *s++ = 0; // Zero-terminate this segment.
  2504. }
  2505. }
  2506. else // Flagged for discard, but we still have to scan for the terminator.
  2507. {
  2508. // Scan for terminating ']]>'.
  2509. PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
  2510. PUGI__CHECK_ERROR(status_bad_cdata, s);
  2511. ++s;
  2512. }
  2513. s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
  2514. }
  2515. else PUGI__THROW_ERROR(status_bad_cdata, s);
  2516. }
  2517. else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
  2518. {
  2519. s -= 2;
  2520. if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
  2521. char_t* mark = s + 9;
  2522. s = parse_doctype_group(s, endch);
  2523. if (!s) return s;
  2524. assert((*s == 0 && endch == '>') || *s == '>');
  2525. if (*s) *s++ = 0;
  2526. if (PUGI__OPTSET(parse_doctype))
  2527. {
  2528. while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
  2529. PUGI__PUSHNODE(node_doctype);
  2530. cursor->value = mark;
  2531. }
  2532. }
  2533. else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
  2534. else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
  2535. else PUGI__THROW_ERROR(status_unrecognized_tag, s);
  2536. return s;
  2537. }
  2538. char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
  2539. {
  2540. // load into registers
  2541. xml_node_struct* cursor = ref_cursor;
  2542. char_t ch = 0;
  2543. // parse node contents, starting with question mark
  2544. ++s;
  2545. // read PI target
  2546. char_t* target = s;
  2547. if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
  2548. PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
  2549. PUGI__CHECK_ERROR(status_bad_pi, s);
  2550. // determine node type; stricmp / strcasecmp is not portable
  2551. bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
  2552. if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
  2553. {
  2554. if (declaration)
  2555. {
  2556. // disallow non top-level declarations
  2557. if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
  2558. PUGI__PUSHNODE(node_declaration);
  2559. }
  2560. else
  2561. {
  2562. PUGI__PUSHNODE(node_pi);
  2563. }
  2564. cursor->name = target;
  2565. PUGI__ENDSEG();
  2566. // parse value/attributes
  2567. if (ch == '?')
  2568. {
  2569. // empty node
  2570. if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
  2571. s += (*s == '>');
  2572. PUGI__POPNODE();
  2573. }
  2574. else if (PUGI__IS_CHARTYPE(ch, ct_space))
  2575. {
  2576. PUGI__SKIPWS();
  2577. // scan for tag end
  2578. char_t* value = s;
  2579. PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
  2580. PUGI__CHECK_ERROR(status_bad_pi, s);
  2581. if (declaration)
  2582. {
  2583. // replace ending ? with / so that 'element' terminates properly
  2584. *s = '/';
  2585. // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
  2586. s = value;
  2587. }
  2588. else
  2589. {
  2590. // store value and step over >
  2591. cursor->value = value;
  2592. PUGI__POPNODE();
  2593. PUGI__ENDSEG();
  2594. s += (*s == '>');
  2595. }
  2596. }
  2597. else PUGI__THROW_ERROR(status_bad_pi, s);
  2598. }
  2599. else
  2600. {
  2601. // scan for tag end
  2602. PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
  2603. PUGI__CHECK_ERROR(status_bad_pi, s);
  2604. s += (s[1] == '>' ? 2 : 1);
  2605. }
  2606. // store from registers
  2607. ref_cursor = cursor;
  2608. return s;
  2609. }
  2610. char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
  2611. {
  2612. strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
  2613. strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
  2614. char_t ch = 0;
  2615. xml_node_struct* cursor = root;
  2616. char_t* mark = s;
  2617. while (*s != 0)
  2618. {
  2619. if (*s == '<')
  2620. {
  2621. ++s;
  2622. LOC_TAG:
  2623. if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
  2624. {
  2625. PUGI__PUSHNODE(node_element); // Append a new node to the tree.
  2626. cursor->name = s;
  2627. PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
  2628. PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
  2629. if (ch == '>')
  2630. {
  2631. // end of tag
  2632. }
  2633. else if (PUGI__IS_CHARTYPE(ch, ct_space))
  2634. {
  2635. LOC_ATTRIBUTES:
  2636. while (true)
  2637. {
  2638. PUGI__SKIPWS(); // Eat any whitespace.
  2639. if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
  2640. {
  2641. xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
  2642. if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
  2643. a->name = s; // Save the offset.
  2644. PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
  2645. PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
  2646. if (PUGI__IS_CHARTYPE(ch, ct_space))
  2647. {
  2648. PUGI__SKIPWS(); // Eat any whitespace.
  2649. ch = *s;
  2650. ++s;
  2651. }
  2652. if (ch == '=') // '<... #=...'
  2653. {
  2654. PUGI__SKIPWS(); // Eat any whitespace.
  2655. if (*s == '"' || *s == '\'') // '<... #="...'
  2656. {
  2657. ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
  2658. ++s; // Step over the quote.
  2659. a->value = s; // Save the offset.
  2660. s = strconv_attribute(s, ch);
  2661. if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
  2662. // After this line the loop continues from the start;
  2663. // Whitespaces, / and > are ok, symbols and EOF are wrong,
  2664. // everything else will be detected
  2665. if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
  2666. }
  2667. else PUGI__THROW_ERROR(status_bad_attribute, s);
  2668. }
  2669. else PUGI__THROW_ERROR(status_bad_attribute, s);
  2670. }
  2671. else if (*s == '/')
  2672. {
  2673. ++s;
  2674. if (*s == '>')
  2675. {
  2676. PUGI__POPNODE();
  2677. s++;
  2678. break;
  2679. }
  2680. else if (*s == 0 && endch == '>')
  2681. {
  2682. PUGI__POPNODE();
  2683. break;
  2684. }
  2685. else PUGI__THROW_ERROR(status_bad_start_element, s);
  2686. }
  2687. else if (*s == '>')
  2688. {
  2689. ++s;
  2690. break;
  2691. }
  2692. else if (*s == 0 && endch == '>')
  2693. {
  2694. break;
  2695. }
  2696. else PUGI__THROW_ERROR(status_bad_start_element, s);
  2697. }
  2698. // !!!
  2699. }
  2700. else if (ch == '/') // '<#.../'
  2701. {
  2702. if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
  2703. PUGI__POPNODE(); // Pop.
  2704. s += (*s == '>');
  2705. }
  2706. else if (ch == 0)
  2707. {
  2708. // we stepped over null terminator, backtrack & handle closing tag
  2709. --s;
  2710. if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
  2711. }
  2712. else PUGI__THROW_ERROR(status_bad_start_element, s);
  2713. }
  2714. else if (*s == '/')
  2715. {
  2716. ++s;
  2717. mark = s;
  2718. char_t* name = cursor->name;
  2719. if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
  2720. while (PUGI__IS_CHARTYPE(*s, ct_symbol))
  2721. {
  2722. if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
  2723. }
  2724. if (*name)
  2725. {
  2726. if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
  2727. else PUGI__THROW_ERROR(status_end_element_mismatch, mark);
  2728. }
  2729. PUGI__POPNODE(); // Pop.
  2730. PUGI__SKIPWS();
  2731. if (*s == 0)
  2732. {
  2733. if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
  2734. }
  2735. else
  2736. {
  2737. if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
  2738. ++s;
  2739. }
  2740. }
  2741. else if (*s == '?') // '<?...'
  2742. {
  2743. s = parse_question(s, cursor, optmsk, endch);
  2744. if (!s) return s;
  2745. assert(cursor);
  2746. if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
  2747. }
  2748. else if (*s == '!') // '<!...'
  2749. {
  2750. s = parse_exclamation(s, cursor, optmsk, endch);
  2751. if (!s) return s;
  2752. }
  2753. else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
  2754. else PUGI__THROW_ERROR(status_unrecognized_tag, s);
  2755. }
  2756. else
  2757. {
  2758. mark = s; // Save this offset while searching for a terminator.
  2759. PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
  2760. if (*s == '<' || !*s)
  2761. {
  2762. // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
  2763. assert(mark != s);
  2764. if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
  2765. {
  2766. continue;
  2767. }
  2768. else if (PUGI__OPTSET(parse_ws_pcdata_single))
  2769. {
  2770. if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
  2771. }
  2772. }
  2773. if (!PUGI__OPTSET(parse_trim_pcdata))
  2774. s = mark;
  2775. if (cursor->parent || PUGI__OPTSET(parse_fragment))
  2776. {
  2777. if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
  2778. {
  2779. cursor->value = s; // Save the offset.
  2780. }
  2781. else
  2782. {
  2783. PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
  2784. cursor->value = s; // Save the offset.
  2785. PUGI__POPNODE(); // Pop since this is a standalone.
  2786. }
  2787. s = strconv_pcdata(s);
  2788. if (!*s) break;
  2789. }
  2790. else
  2791. {
  2792. PUGI__SCANFOR(*s == '<'); // '...<'
  2793. if (!*s) break;
  2794. ++s;
  2795. }
  2796. // We're after '<'
  2797. goto LOC_TAG;
  2798. }
  2799. }
  2800. // check that last tag is closed
  2801. if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
  2802. return s;
  2803. }
  2804. #ifdef PUGIXML_WCHAR_MODE
  2805. static char_t* parse_skip_bom(char_t* s)
  2806. {
  2807. unsigned int bom = 0xfeff;
  2808. return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
  2809. }
  2810. #else
  2811. static char_t* parse_skip_bom(char_t* s)
  2812. {
  2813. return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
  2814. }
  2815. #endif
  2816. static bool has_element_node_siblings(xml_node_struct* node)
  2817. {
  2818. while (node)
  2819. {
  2820. if (PUGI__NODETYPE(node) == node_element) return true;
  2821. node = node->next_sibling;
  2822. }
  2823. return false;
  2824. }
  2825. static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
  2826. {
  2827. // early-out for empty documents
  2828. if (length == 0)
  2829. return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
  2830. // get last child of the root before parsing
  2831. xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
  2832. // create parser on stack
  2833. xml_parser parser(static_cast<xml_allocator*>(xmldoc));
  2834. // save last character and make buffer zero-terminated (speeds up parsing)
  2835. char_t endch = buffer[length - 1];
  2836. buffer[length - 1] = 0;
  2837. // skip BOM to make sure it does not end up as part of parse output
  2838. char_t* buffer_data = parse_skip_bom(buffer);
  2839. // perform actual parsing
  2840. parser.parse_tree(buffer_data, root, optmsk, endch);
  2841. xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
  2842. assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
  2843. if (result)
  2844. {
  2845. // since we removed last character, we have to handle the only possible false positive (stray <)
  2846. if (endch == '<')
  2847. return make_parse_result(status_unrecognized_tag, length - 1);
  2848. // check if there are any element nodes parsed
  2849. xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
  2850. if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
  2851. return make_parse_result(status_no_document_element, length - 1);
  2852. }
  2853. else
  2854. {
  2855. // roll back offset if it occurs on a null terminator in the source buffer
  2856. if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
  2857. result.offset--;
  2858. }
  2859. return result;
  2860. }
  2861. };
  2862. // Output facilities
  2863. PUGI__FN xml_encoding get_write_native_encoding()
  2864. {
  2865. #ifdef PUGIXML_WCHAR_MODE
  2866. return get_wchar_encoding();
  2867. #else
  2868. return encoding_utf8;
  2869. #endif
  2870. }
  2871. PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
  2872. {
  2873. // replace wchar encoding with utf implementation
  2874. if (encoding == encoding_wchar) return get_wchar_encoding();
  2875. // replace utf16 encoding with utf16 with specific endianness
  2876. if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  2877. // replace utf32 encoding with utf32 with specific endianness
  2878. if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  2879. // only do autodetection if no explicit encoding is requested
  2880. if (encoding != encoding_auto) return encoding;
  2881. // assume utf8 encoding
  2882. return encoding_utf8;
  2883. }
  2884. template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
  2885. {
  2886. PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
  2887. typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
  2888. return static_cast<size_t>(end - dest) * sizeof(*dest);
  2889. }
  2890. template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
  2891. {
  2892. PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
  2893. typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
  2894. if (opt_swap)
  2895. {
  2896. for (typename T::value_type i = dest; i != end; ++i)
  2897. *i = endian_swap(*i);
  2898. }
  2899. return static_cast<size_t>(end - dest) * sizeof(*dest);
  2900. }
  2901. #ifdef PUGIXML_WCHAR_MODE
  2902. PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
  2903. {
  2904. if (length < 1) return 0;
  2905. // discard last character if it's the lead of a surrogate pair
  2906. return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
  2907. }
  2908. PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
  2909. {
  2910. // only endian-swapping is required
  2911. if (need_endian_swap_utf(encoding, get_wchar_encoding()))
  2912. {
  2913. convert_wchar_endian_swap(r_char, data, length);
  2914. return length * sizeof(char_t);
  2915. }
  2916. // convert to utf8
  2917. if (encoding == encoding_utf8)
  2918. return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
  2919. // convert to utf16
  2920. if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
  2921. {
  2922. xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  2923. return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
  2924. }
  2925. // convert to utf32
  2926. if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
  2927. {
  2928. xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  2929. return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
  2930. }
  2931. // convert to latin1
  2932. if (encoding == encoding_latin1)
  2933. return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
  2934. assert(false && "Invalid encoding"); // unreachable
  2935. return 0;
  2936. }
  2937. #else
  2938. PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
  2939. {
  2940. if (length < 5) return 0;
  2941. for (size_t i = 1; i <= 4; ++i)
  2942. {
  2943. uint8_t ch = static_cast<uint8_t>(data[length - i]);
  2944. // either a standalone character or a leading one
  2945. if ((ch & 0xc0) != 0x80) return length - i;
  2946. }
  2947. // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
  2948. return length;
  2949. }
  2950. PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
  2951. {
  2952. if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
  2953. {
  2954. xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  2955. return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
  2956. }
  2957. if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
  2958. {
  2959. xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  2960. return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
  2961. }
  2962. if (encoding == encoding_latin1)
  2963. return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
  2964. assert(false && "Invalid encoding"); // unreachable
  2965. return 0;
  2966. }
  2967. #endif
  2968. class xml_buffered_writer
  2969. {
  2970. xml_buffered_writer(const xml_buffered_writer&);
  2971. xml_buffered_writer& operator=(const xml_buffered_writer&);
  2972. public:
  2973. xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
  2974. {
  2975. PUGI__STATIC_ASSERT(bufcapacity >= 8);
  2976. }
  2977. size_t flush()
  2978. {
  2979. flush(buffer, bufsize);
  2980. bufsize = 0;
  2981. return 0;
  2982. }
  2983. void flush(const char_t* data, size_t size)
  2984. {
  2985. if (size == 0) return;
  2986. // fast path, just write data
  2987. if (encoding == get_write_native_encoding())
  2988. writer.write(data, size * sizeof(char_t));
  2989. else
  2990. {
  2991. // convert chunk
  2992. size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
  2993. assert(result <= sizeof(scratch));
  2994. // write data
  2995. writer.write(scratch.data_u8, result);
  2996. }
  2997. }
  2998. void write_direct(const char_t* data, size_t length)
  2999. {
  3000. // flush the remaining buffer contents
  3001. flush();
  3002. // handle large chunks
  3003. if (length > bufcapacity)
  3004. {
  3005. if (encoding == get_write_native_encoding())
  3006. {
  3007. // fast path, can just write data chunk
  3008. writer.write(data, length * sizeof(char_t));
  3009. return;
  3010. }
  3011. // need to convert in suitable chunks
  3012. while (length > bufcapacity)
  3013. {
  3014. // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
  3015. // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
  3016. size_t chunk_size = get_valid_length(data, bufcapacity);
  3017. assert(chunk_size);
  3018. // convert chunk and write
  3019. flush(data, chunk_size);
  3020. // iterate
  3021. data += chunk_size;
  3022. length -= chunk_size;
  3023. }
  3024. // small tail is copied below
  3025. bufsize = 0;
  3026. }
  3027. memcpy(buffer + bufsize, data, length * sizeof(char_t));
  3028. bufsize += length;
  3029. }
  3030. void write_buffer(const char_t* data, size_t length)
  3031. {
  3032. size_t offset = bufsize;
  3033. if (offset + length <= bufcapacity)
  3034. {
  3035. memcpy(buffer + offset, data, length * sizeof(char_t));
  3036. bufsize = offset + length;
  3037. }
  3038. else
  3039. {
  3040. write_direct(data, length);
  3041. }
  3042. }
  3043. void write_string(const char_t* data)
  3044. {
  3045. // write the part of the string that fits in the buffer
  3046. size_t offset = bufsize;
  3047. while (*data && offset < bufcapacity)
  3048. buffer[offset++] = *data++;
  3049. // write the rest
  3050. if (offset < bufcapacity)
  3051. {
  3052. bufsize = offset;
  3053. }
  3054. else
  3055. {
  3056. // backtrack a bit if we have split the codepoint
  3057. size_t length = offset - bufsize;
  3058. size_t extra = length - get_valid_length(data - length, length);
  3059. bufsize = offset - extra;
  3060. write_direct(data - extra, strlength(data) + extra);
  3061. }
  3062. }
  3063. void write(char_t d0)
  3064. {
  3065. size_t offset = bufsize;
  3066. if (offset > bufcapacity - 1) offset = flush();
  3067. buffer[offset + 0] = d0;
  3068. bufsize = offset + 1;
  3069. }
  3070. void write(char_t d0, char_t d1)
  3071. {
  3072. size_t offset = bufsize;
  3073. if (offset > bufcapacity - 2) offset = flush();
  3074. buffer[offset + 0] = d0;
  3075. buffer[offset + 1] = d1;
  3076. bufsize = offset + 2;
  3077. }
  3078. void write(char_t d0, char_t d1, char_t d2)
  3079. {
  3080. size_t offset = bufsize;
  3081. if (offset > bufcapacity - 3) offset = flush();
  3082. buffer[offset + 0] = d0;
  3083. buffer[offset + 1] = d1;
  3084. buffer[offset + 2] = d2;
  3085. bufsize = offset + 3;
  3086. }
  3087. void write(char_t d0, char_t d1, char_t d2, char_t d3)
  3088. {
  3089. size_t offset = bufsize;
  3090. if (offset > bufcapacity - 4) offset = flush();
  3091. buffer[offset + 0] = d0;
  3092. buffer[offset + 1] = d1;
  3093. buffer[offset + 2] = d2;
  3094. buffer[offset + 3] = d3;
  3095. bufsize = offset + 4;
  3096. }
  3097. void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
  3098. {
  3099. size_t offset = bufsize;
  3100. if (offset > bufcapacity - 5) offset = flush();
  3101. buffer[offset + 0] = d0;
  3102. buffer[offset + 1] = d1;
  3103. buffer[offset + 2] = d2;
  3104. buffer[offset + 3] = d3;
  3105. buffer[offset + 4] = d4;
  3106. bufsize = offset + 5;
  3107. }
  3108. void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
  3109. {
  3110. size_t offset = bufsize;
  3111. if (offset > bufcapacity - 6) offset = flush();
  3112. buffer[offset + 0] = d0;
  3113. buffer[offset + 1] = d1;
  3114. buffer[offset + 2] = d2;
  3115. buffer[offset + 3] = d3;
  3116. buffer[offset + 4] = d4;
  3117. buffer[offset + 5] = d5;
  3118. bufsize = offset + 6;
  3119. }
  3120. // utf8 maximum expansion: x4 (-> utf32)
  3121. // utf16 maximum expansion: x2 (-> utf32)
  3122. // utf32 maximum expansion: x1
  3123. enum
  3124. {
  3125. bufcapacitybytes =
  3126. #ifdef PUGIXML_MEMORY_OUTPUT_STACK
  3127. PUGIXML_MEMORY_OUTPUT_STACK
  3128. #else
  3129. 10240
  3130. #endif
  3131. ,
  3132. bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
  3133. };
  3134. char_t buffer[bufcapacity];
  3135. union
  3136. {
  3137. uint8_t data_u8[4 * bufcapacity];
  3138. uint16_t data_u16[2 * bufcapacity];
  3139. uint32_t data_u32[bufcapacity];
  3140. char_t data_char[bufcapacity];
  3141. } scratch;
  3142. xml_writer& writer;
  3143. size_t bufsize;
  3144. xml_encoding encoding;
  3145. };
  3146. PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
  3147. {
  3148. while (*s)
  3149. {
  3150. const char_t* prev = s;
  3151. // While *s is a usual symbol
  3152. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
  3153. writer.write_buffer(prev, static_cast<size_t>(s - prev));
  3154. switch (*s)
  3155. {
  3156. case 0: break;
  3157. case '&':
  3158. writer.write('&', 'a', 'm', 'p', ';');
  3159. ++s;
  3160. break;
  3161. case '<':
  3162. writer.write('&', 'l', 't', ';');
  3163. ++s;
  3164. break;
  3165. case '>':
  3166. writer.write('&', 'g', 't', ';');
  3167. ++s;
  3168. break;
  3169. case '"':
  3170. if (flags & format_attribute_single_quote)
  3171. writer.write('"');
  3172. else
  3173. writer.write('&', 'q', 'u', 'o', 't', ';');
  3174. ++s;
  3175. break;
  3176. case '\'':
  3177. if (flags & format_attribute_single_quote)
  3178. writer.write('&', 'a', 'p', 'o', 's', ';');
  3179. else
  3180. writer.write('\'');
  3181. ++s;
  3182. break;
  3183. default: // s is not a usual symbol
  3184. {
  3185. unsigned int ch = static_cast<unsigned int>(*s++);
  3186. assert(ch < 32);
  3187. if (!(flags & format_skip_control_chars))
  3188. writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
  3189. }
  3190. }
  3191. }
  3192. }
  3193. PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
  3194. {
  3195. if (flags & format_no_escapes)
  3196. writer.write_string(s);
  3197. else
  3198. text_output_escaped(writer, s, type, flags);
  3199. }
  3200. PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
  3201. {
  3202. do
  3203. {
  3204. writer.write('<', '!', '[', 'C', 'D');
  3205. writer.write('A', 'T', 'A', '[');
  3206. const char_t* prev = s;
  3207. // look for ]]> sequence - we can't output it as is since it terminates CDATA
  3208. while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
  3209. // skip ]] if we stopped at ]]>, > will go to the next CDATA section
  3210. if (*s) s += 2;
  3211. writer.write_buffer(prev, static_cast<size_t>(s - prev));
  3212. writer.write(']', ']', '>');
  3213. }
  3214. while (*s);
  3215. }
  3216. PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
  3217. {
  3218. switch (indent_length)
  3219. {
  3220. case 1:
  3221. {
  3222. for (unsigned int i = 0; i < depth; ++i)
  3223. writer.write(indent[0]);
  3224. break;
  3225. }
  3226. case 2:
  3227. {
  3228. for (unsigned int i = 0; i < depth; ++i)
  3229. writer.write(indent[0], indent[1]);
  3230. break;
  3231. }
  3232. case 3:
  3233. {
  3234. for (unsigned int i = 0; i < depth; ++i)
  3235. writer.write(indent[0], indent[1], indent[2]);
  3236. break;
  3237. }
  3238. case 4:
  3239. {
  3240. for (unsigned int i = 0; i < depth; ++i)
  3241. writer.write(indent[0], indent[1], indent[2], indent[3]);
  3242. break;
  3243. }
  3244. default:
  3245. {
  3246. for (unsigned int i = 0; i < depth; ++i)
  3247. writer.write_buffer(indent, indent_length);
  3248. }
  3249. }
  3250. }
  3251. PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
  3252. {
  3253. writer.write('<', '!', '-', '-');
  3254. while (*s)
  3255. {
  3256. const char_t* prev = s;
  3257. // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
  3258. while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
  3259. writer.write_buffer(prev, static_cast<size_t>(s - prev));
  3260. if (*s)
  3261. {
  3262. assert(*s == '-');
  3263. writer.write('-', ' ');
  3264. ++s;
  3265. }
  3266. }
  3267. writer.write('-', '-', '>');
  3268. }
  3269. PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
  3270. {
  3271. while (*s)
  3272. {
  3273. const char_t* prev = s;
  3274. // look for ?> sequence - we can't output it since ?> terminates PI
  3275. while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
  3276. writer.write_buffer(prev, static_cast<size_t>(s - prev));
  3277. if (*s)
  3278. {
  3279. assert(s[0] == '?' && s[1] == '>');
  3280. writer.write('?', ' ', '>');
  3281. s += 2;
  3282. }
  3283. }
  3284. }
  3285. PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
  3286. {
  3287. const char_t* default_name = PUGIXML_TEXT(":anonymous");
  3288. const char_t enquotation_char = (flags & format_attribute_single_quote) ? '\'' : '"';
  3289. for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
  3290. {
  3291. if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
  3292. {
  3293. writer.write('\n');
  3294. text_output_indent(writer, indent, indent_length, depth + 1);
  3295. }
  3296. else
  3297. {
  3298. writer.write(' ');
  3299. }
  3300. writer.write_string(a->name ? a->name + 0 : default_name);
  3301. writer.write('=', enquotation_char);
  3302. if (a->value)
  3303. text_output(writer, a->value, ctx_special_attr, flags);
  3304. writer.write(enquotation_char);
  3305. }
  3306. }
  3307. PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
  3308. {
  3309. const char_t* default_name = PUGIXML_TEXT(":anonymous");
  3310. const char_t* name = node->name ? node->name + 0 : default_name;
  3311. writer.write('<');
  3312. writer.write_string(name);
  3313. if (node->first_attribute)
  3314. node_output_attributes(writer, node, indent, indent_length, flags, depth);
  3315. // element nodes can have value if parse_embed_pcdata was used
  3316. if (!node->value)
  3317. {
  3318. if (!node->first_child)
  3319. {
  3320. if (flags & format_no_empty_element_tags)
  3321. {
  3322. writer.write('>', '<', '/');
  3323. writer.write_string(name);
  3324. writer.write('>');
  3325. return false;
  3326. }
  3327. else
  3328. {
  3329. if ((flags & format_raw) == 0)
  3330. writer.write(' ');
  3331. writer.write('/', '>');
  3332. return false;
  3333. }
  3334. }
  3335. else
  3336. {
  3337. writer.write('>');
  3338. return true;
  3339. }
  3340. }
  3341. else
  3342. {
  3343. writer.write('>');
  3344. text_output(writer, node->value, ctx_special_pcdata, flags);
  3345. if (!node->first_child)
  3346. {
  3347. writer.write('<', '/');
  3348. writer.write_string(name);
  3349. writer.write('>');
  3350. return false;
  3351. }
  3352. else
  3353. {
  3354. return true;
  3355. }
  3356. }
  3357. }
  3358. PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
  3359. {
  3360. const char_t* default_name = PUGIXML_TEXT(":anonymous");
  3361. const char_t* name = node->name ? node->name + 0 : default_name;
  3362. writer.write('<', '/');
  3363. writer.write_string(name);
  3364. writer.write('>');
  3365. }
  3366. PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
  3367. {
  3368. const char_t* default_name = PUGIXML_TEXT(":anonymous");
  3369. switch (PUGI__NODETYPE(node))
  3370. {
  3371. case node_pcdata:
  3372. text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
  3373. break;
  3374. case node_cdata:
  3375. text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
  3376. break;
  3377. case node_comment:
  3378. node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
  3379. break;
  3380. case node_pi:
  3381. writer.write('<', '?');
  3382. writer.write_string(node->name ? node->name + 0 : default_name);
  3383. if (node->value)
  3384. {
  3385. writer.write(' ');
  3386. node_output_pi_value(writer, node->value);
  3387. }
  3388. writer.write('?', '>');
  3389. break;
  3390. case node_declaration:
  3391. writer.write('<', '?');
  3392. writer.write_string(node->name ? node->name + 0 : default_name);
  3393. node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
  3394. writer.write('?', '>');
  3395. break;
  3396. case node_doctype:
  3397. writer.write('<', '!', 'D', 'O', 'C');
  3398. writer.write('T', 'Y', 'P', 'E');
  3399. if (node->value)
  3400. {
  3401. writer.write(' ');
  3402. writer.write_string(node->value);
  3403. }
  3404. writer.write('>');
  3405. break;
  3406. default:
  3407. assert(false && "Invalid node type"); // unreachable
  3408. }
  3409. }
  3410. enum indent_flags_t
  3411. {
  3412. indent_newline = 1,
  3413. indent_indent = 2
  3414. };
  3415. PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
  3416. {
  3417. size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
  3418. unsigned int indent_flags = indent_indent;
  3419. xml_node_struct* node = root;
  3420. do
  3421. {
  3422. assert(node);
  3423. // begin writing current node
  3424. if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
  3425. {
  3426. node_output_simple(writer, node, flags);
  3427. indent_flags = 0;
  3428. }
  3429. else
  3430. {
  3431. if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
  3432. writer.write('\n');
  3433. if ((indent_flags & indent_indent) && indent_length)
  3434. text_output_indent(writer, indent, indent_length, depth);
  3435. if (PUGI__NODETYPE(node) == node_element)
  3436. {
  3437. indent_flags = indent_newline | indent_indent;
  3438. if (node_output_start(writer, node, indent, indent_length, flags, depth))
  3439. {
  3440. // element nodes can have value if parse_embed_pcdata was used
  3441. if (node->value)
  3442. indent_flags = 0;
  3443. node = node->first_child;
  3444. depth++;
  3445. continue;
  3446. }
  3447. }
  3448. else if (PUGI__NODETYPE(node) == node_document)
  3449. {
  3450. indent_flags = indent_indent;
  3451. if (node->first_child)
  3452. {
  3453. node = node->first_child;
  3454. continue;
  3455. }
  3456. }
  3457. else
  3458. {
  3459. node_output_simple(writer, node, flags);
  3460. indent_flags = indent_newline | indent_indent;
  3461. }
  3462. }
  3463. // continue to the next node
  3464. while (node != root)
  3465. {
  3466. if (node->next_sibling)
  3467. {
  3468. node = node->next_sibling;
  3469. break;
  3470. }
  3471. node = node->parent;
  3472. // write closing node
  3473. if (PUGI__NODETYPE(node) == node_element)
  3474. {
  3475. depth--;
  3476. if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
  3477. writer.write('\n');
  3478. if ((indent_flags & indent_indent) && indent_length)
  3479. text_output_indent(writer, indent, indent_length, depth);
  3480. node_output_end(writer, node);
  3481. indent_flags = indent_newline | indent_indent;
  3482. }
  3483. }
  3484. }
  3485. while (node != root);
  3486. if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
  3487. writer.write('\n');
  3488. }
  3489. PUGI__FN bool has_declaration(xml_node_struct* node)
  3490. {
  3491. for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
  3492. {
  3493. xml_node_type type = PUGI__NODETYPE(child);
  3494. if (type == node_declaration) return true;
  3495. if (type == node_element) return false;
  3496. }
  3497. return false;
  3498. }
  3499. PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
  3500. {
  3501. for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
  3502. if (a == attr)
  3503. return true;
  3504. return false;
  3505. }
  3506. PUGI__FN bool allow_insert_attribute(xml_node_type parent)
  3507. {
  3508. return parent == node_element || parent == node_declaration;
  3509. }
  3510. PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
  3511. {
  3512. if (parent != node_document && parent != node_element) return false;
  3513. if (child == node_document || child == node_null) return false;
  3514. if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
  3515. return true;
  3516. }
  3517. PUGI__FN bool allow_move(xml_node parent, xml_node child)
  3518. {
  3519. // check that child can be a child of parent
  3520. if (!allow_insert_child(parent.type(), child.type()))
  3521. return false;
  3522. // check that node is not moved between documents
  3523. if (parent.root() != child.root())
  3524. return false;
  3525. // check that new parent is not in the child subtree
  3526. xml_node cur = parent;
  3527. while (cur)
  3528. {
  3529. if (cur == child)
  3530. return false;
  3531. cur = cur.parent();
  3532. }
  3533. return true;
  3534. }
  3535. template <typename String, typename Header>
  3536. PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
  3537. {
  3538. assert(!dest && (header & header_mask) == 0);
  3539. if (source)
  3540. {
  3541. if (alloc && (source_header & header_mask) == 0)
  3542. {
  3543. dest = source;
  3544. // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
  3545. header |= xml_memory_page_contents_shared_mask;
  3546. source_header |= xml_memory_page_contents_shared_mask;
  3547. }
  3548. else
  3549. strcpy_insitu(dest, header, header_mask, source, strlength(source));
  3550. }
  3551. }
  3552. PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
  3553. {
  3554. node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
  3555. node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
  3556. for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
  3557. {
  3558. xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
  3559. if (da)
  3560. {
  3561. node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
  3562. node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
  3563. }
  3564. }
  3565. }
  3566. PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
  3567. {
  3568. xml_allocator& alloc = get_allocator(dn);
  3569. xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
  3570. node_copy_contents(dn, sn, shared_alloc);
  3571. xml_node_struct* dit = dn;
  3572. xml_node_struct* sit = sn->first_child;
  3573. while (sit && sit != sn)
  3574. {
  3575. // loop invariant: dit is inside the subtree rooted at dn
  3576. assert(dit);
  3577. // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop
  3578. if (sit != dn)
  3579. {
  3580. xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
  3581. if (copy)
  3582. {
  3583. node_copy_contents(copy, sit, shared_alloc);
  3584. if (sit->first_child)
  3585. {
  3586. dit = copy;
  3587. sit = sit->first_child;
  3588. continue;
  3589. }
  3590. }
  3591. }
  3592. // continue to the next node
  3593. do
  3594. {
  3595. if (sit->next_sibling)
  3596. {
  3597. sit = sit->next_sibling;
  3598. break;
  3599. }
  3600. sit = sit->parent;
  3601. dit = dit->parent;
  3602. // loop invariant: dit is inside the subtree rooted at dn while sit is inside sn
  3603. assert(sit == sn || dit);
  3604. }
  3605. while (sit != sn);
  3606. }
  3607. assert(!sit || dit == dn->parent);
  3608. }
  3609. PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
  3610. {
  3611. xml_allocator& alloc = get_allocator(da);
  3612. xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
  3613. node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
  3614. node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
  3615. }
  3616. inline bool is_text_node(xml_node_struct* node)
  3617. {
  3618. xml_node_type type = PUGI__NODETYPE(node);
  3619. return type == node_pcdata || type == node_cdata;
  3620. }
  3621. // get value with conversion functions
  3622. template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv)
  3623. {
  3624. U result = 0;
  3625. const char_t* s = value;
  3626. while (PUGI__IS_CHARTYPE(*s, ct_space))
  3627. s++;
  3628. bool negative = (*s == '-');
  3629. s += (*s == '+' || *s == '-');
  3630. bool overflow = false;
  3631. if (s[0] == '0' && (s[1] | ' ') == 'x')
  3632. {
  3633. s += 2;
  3634. // since overflow detection relies on length of the sequence skip leading zeros
  3635. while (*s == '0')
  3636. s++;
  3637. const char_t* start = s;
  3638. for (;;)
  3639. {
  3640. if (static_cast<unsigned>(*s - '0') < 10)
  3641. result = result * 16 + (*s - '0');
  3642. else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
  3643. result = result * 16 + ((*s | ' ') - 'a' + 10);
  3644. else
  3645. break;
  3646. s++;
  3647. }
  3648. size_t digits = static_cast<size_t>(s - start);
  3649. overflow = digits > sizeof(U) * 2;
  3650. }
  3651. else
  3652. {
  3653. // since overflow detection relies on length of the sequence skip leading zeros
  3654. while (*s == '0')
  3655. s++;
  3656. const char_t* start = s;
  3657. for (;;)
  3658. {
  3659. if (static_cast<unsigned>(*s - '0') < 10)
  3660. result = result * 10 + (*s - '0');
  3661. else
  3662. break;
  3663. s++;
  3664. }
  3665. size_t digits = static_cast<size_t>(s - start);
  3666. PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
  3667. const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
  3668. const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
  3669. const size_t high_bit = sizeof(U) * 8 - 1;
  3670. overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
  3671. }
  3672. if (negative)
  3673. {
  3674. // Workaround for crayc++ CC-3059: Expected no overflow in routine.
  3675. #ifdef _CRAYC
  3676. return (overflow || result > ~minv + 1) ? minv : ~result + 1;
  3677. #else
  3678. return (overflow || result > 0 - minv) ? minv : 0 - result;
  3679. #endif
  3680. }
  3681. else
  3682. return (overflow || result > maxv) ? maxv : result;
  3683. }
  3684. PUGI__FN int get_value_int(const char_t* value)
  3685. {
  3686. return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX);
  3687. }
  3688. PUGI__FN unsigned int get_value_uint(const char_t* value)
  3689. {
  3690. return string_to_integer<unsigned int>(value, 0, UINT_MAX);
  3691. }
  3692. PUGI__FN double get_value_double(const char_t* value)
  3693. {
  3694. #ifdef PUGIXML_WCHAR_MODE
  3695. return wcstod(value, 0);
  3696. #else
  3697. return strtod(value, 0);
  3698. #endif
  3699. }
  3700. PUGI__FN float get_value_float(const char_t* value)
  3701. {
  3702. #ifdef PUGIXML_WCHAR_MODE
  3703. return static_cast<float>(wcstod(value, 0));
  3704. #else
  3705. return static_cast<float>(strtod(value, 0));
  3706. #endif
  3707. }
  3708. PUGI__FN bool get_value_bool(const char_t* value)
  3709. {
  3710. // only look at first char
  3711. char_t first = *value;
  3712. // 1*, t* (true), T* (True), y* (yes), Y* (YES)
  3713. return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
  3714. }
  3715. #ifdef PUGIXML_HAS_LONG_LONG
  3716. PUGI__FN long long get_value_llong(const char_t* value)
  3717. {
  3718. return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
  3719. }
  3720. PUGI__FN unsigned long long get_value_ullong(const char_t* value)
  3721. {
  3722. return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
  3723. }
  3724. #endif
  3725. template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
  3726. {
  3727. char_t* result = end - 1;
  3728. U rest = negative ? 0 - value : value;
  3729. do
  3730. {
  3731. *result-- = static_cast<char_t>('0' + (rest % 10));
  3732. rest /= 10;
  3733. }
  3734. while (rest);
  3735. assert(result >= begin);
  3736. (void)begin;
  3737. *result = '-';
  3738. return result + !negative;
  3739. }
  3740. // set value with conversion functions
  3741. template <typename String, typename Header>
  3742. PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
  3743. {
  3744. #ifdef PUGIXML_WCHAR_MODE
  3745. char_t wbuf[128];
  3746. assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
  3747. size_t offset = 0;
  3748. for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
  3749. return strcpy_insitu(dest, header, header_mask, wbuf, offset);
  3750. #else
  3751. return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
  3752. #endif
  3753. }
  3754. template <typename U, typename String, typename Header>
  3755. PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
  3756. {
  3757. char_t buf[64];
  3758. char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
  3759. char_t* begin = integer_to_string(buf, end, value, negative);
  3760. return strcpy_insitu(dest, header, header_mask, begin, end - begin);
  3761. }
  3762. template <typename String, typename Header>
  3763. PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value, int precision)
  3764. {
  3765. char buf[128];
  3766. PUGI__SNPRINTF(buf, "%.*g", precision, double(value));
  3767. return set_value_ascii(dest, header, header_mask, buf);
  3768. }
  3769. template <typename String, typename Header>
  3770. PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value, int precision)
  3771. {
  3772. char buf[128];
  3773. PUGI__SNPRINTF(buf, "%.*g", precision, value);
  3774. return set_value_ascii(dest, header, header_mask, buf);
  3775. }
  3776. template <typename String, typename Header>
  3777. PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
  3778. {
  3779. return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
  3780. }
  3781. PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
  3782. {
  3783. // check input buffer
  3784. if (!contents && size) return make_parse_result(status_io_error);
  3785. // get actual encoding
  3786. xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
  3787. // get private buffer
  3788. char_t* buffer = 0;
  3789. size_t length = 0;
  3790. // coverity[var_deref_model]
  3791. if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
  3792. // delete original buffer if we performed a conversion
  3793. if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
  3794. // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
  3795. if (own || buffer != contents) *out_buffer = buffer;
  3796. // store buffer for offset_debug
  3797. doc->buffer = buffer;
  3798. // parse
  3799. xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
  3800. // remember encoding
  3801. res.encoding = buffer_encoding;
  3802. return res;
  3803. }
  3804. // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
  3805. PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
  3806. {
  3807. #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
  3808. // there are 64-bit versions of fseek/ftell, let's use them
  3809. typedef __int64 length_type;
  3810. _fseeki64(file, 0, SEEK_END);
  3811. length_type length = _ftelli64(file);
  3812. _fseeki64(file, 0, SEEK_SET);
  3813. #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
  3814. // there are 64-bit versions of fseek/ftell, let's use them
  3815. typedef off64_t length_type;
  3816. fseeko64(file, 0, SEEK_END);
  3817. length_type length = ftello64(file);
  3818. fseeko64(file, 0, SEEK_SET);
  3819. #else
  3820. // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
  3821. typedef long length_type;
  3822. fseek(file, 0, SEEK_END);
  3823. length_type length = ftell(file);
  3824. fseek(file, 0, SEEK_SET);
  3825. #endif
  3826. // check for I/O errors
  3827. if (length < 0) return status_io_error;
  3828. // check for overflow
  3829. size_t result = static_cast<size_t>(length);
  3830. if (static_cast<length_type>(result) != length) return status_out_of_memory;
  3831. // finalize
  3832. out_result = result;
  3833. return status_ok;
  3834. }
  3835. // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
  3836. PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
  3837. {
  3838. // We only need to zero-terminate if encoding conversion does not do it for us
  3839. #ifdef PUGIXML_WCHAR_MODE
  3840. xml_encoding wchar_encoding = get_wchar_encoding();
  3841. if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
  3842. {
  3843. size_t length = size / sizeof(char_t);
  3844. static_cast<char_t*>(buffer)[length] = 0;
  3845. return (length + 1) * sizeof(char_t);
  3846. }
  3847. #else
  3848. if (encoding == encoding_utf8)
  3849. {
  3850. static_cast<char*>(buffer)[size] = 0;
  3851. return size + 1;
  3852. }
  3853. #endif
  3854. return size;
  3855. }
  3856. PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
  3857. {
  3858. if (!file) return make_parse_result(status_file_not_found);
  3859. // get file size (can result in I/O errors)
  3860. size_t size = 0;
  3861. xml_parse_status size_status = get_file_size(file, size);
  3862. if (size_status != status_ok) return make_parse_result(size_status);
  3863. size_t max_suffix_size = sizeof(char_t);
  3864. // allocate buffer for the whole file
  3865. char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
  3866. if (!contents) return make_parse_result(status_out_of_memory);
  3867. // read file in memory
  3868. size_t read_size = fread(contents, 1, size, file);
  3869. if (read_size != size)
  3870. {
  3871. xml_memory::deallocate(contents);
  3872. return make_parse_result(status_io_error);
  3873. }
  3874. xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
  3875. return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
  3876. }
  3877. PUGI__FN void close_file(FILE* file)
  3878. {
  3879. fclose(file);
  3880. }
  3881. #ifndef PUGIXML_NO_STL
  3882. template <typename T> struct xml_stream_chunk
  3883. {
  3884. static xml_stream_chunk* create()
  3885. {
  3886. void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
  3887. if (!memory) return 0;
  3888. return new (memory) xml_stream_chunk();
  3889. }
  3890. static void destroy(xml_stream_chunk* chunk)
  3891. {
  3892. // free chunk chain
  3893. while (chunk)
  3894. {
  3895. xml_stream_chunk* next_ = chunk->next;
  3896. xml_memory::deallocate(chunk);
  3897. chunk = next_;
  3898. }
  3899. }
  3900. xml_stream_chunk(): next(0), size(0)
  3901. {
  3902. }
  3903. xml_stream_chunk* next;
  3904. size_t size;
  3905. T data[xml_memory_page_size / sizeof(T)];
  3906. };
  3907. template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
  3908. {
  3909. auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
  3910. // read file to a chunk list
  3911. size_t total = 0;
  3912. xml_stream_chunk<T>* last = 0;
  3913. while (!stream.eof())
  3914. {
  3915. // allocate new chunk
  3916. xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
  3917. if (!chunk) return status_out_of_memory;
  3918. // append chunk to list
  3919. if (last) last = last->next = chunk;
  3920. else chunks.data = last = chunk;
  3921. // read data to chunk
  3922. stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
  3923. chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
  3924. // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
  3925. if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
  3926. // guard against huge files (chunk size is small enough to make this overflow check work)
  3927. if (total + chunk->size < total) return status_out_of_memory;
  3928. total += chunk->size;
  3929. }
  3930. size_t max_suffix_size = sizeof(char_t);
  3931. // copy chunk list to a contiguous buffer
  3932. char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
  3933. if (!buffer) return status_out_of_memory;
  3934. char* write = buffer;
  3935. for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
  3936. {
  3937. assert(write + chunk->size <= buffer + total);
  3938. memcpy(write, chunk->data, chunk->size);
  3939. write += chunk->size;
  3940. }
  3941. assert(write == buffer + total);
  3942. // return buffer
  3943. *out_buffer = buffer;
  3944. *out_size = total;
  3945. return status_ok;
  3946. }
  3947. template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
  3948. {
  3949. // get length of remaining data in stream
  3950. typename std::basic_istream<T>::pos_type pos = stream.tellg();
  3951. stream.seekg(0, std::ios::end);
  3952. std::streamoff length = stream.tellg() - pos;
  3953. stream.seekg(pos);
  3954. if (stream.fail() || pos < 0) return status_io_error;
  3955. // guard against huge files
  3956. size_t read_length = static_cast<size_t>(length);
  3957. if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
  3958. size_t max_suffix_size = sizeof(char_t);
  3959. // read stream data into memory (guard against stream exceptions with buffer holder)
  3960. auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
  3961. if (!buffer.data) return status_out_of_memory;
  3962. stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
  3963. // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
  3964. if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
  3965. // return buffer
  3966. size_t actual_length = static_cast<size_t>(stream.gcount());
  3967. assert(actual_length <= read_length);
  3968. *out_buffer = buffer.release();
  3969. *out_size = actual_length * sizeof(T);
  3970. return status_ok;
  3971. }
  3972. template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
  3973. {
  3974. void* buffer = 0;
  3975. size_t size = 0;
  3976. xml_parse_status status = status_ok;
  3977. // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
  3978. if (stream.fail()) return make_parse_result(status_io_error);
  3979. // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
  3980. if (stream.tellg() < 0)
  3981. {
  3982. stream.clear(); // clear error flags that could be set by a failing tellg
  3983. status = load_stream_data_noseek(stream, &buffer, &size);
  3984. }
  3985. else
  3986. status = load_stream_data_seek(stream, &buffer, &size);
  3987. if (status != status_ok) return make_parse_result(status);
  3988. xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
  3989. return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
  3990. }
  3991. #endif
  3992. #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
  3993. PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
  3994. {
  3995. #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
  3996. FILE* file = 0;
  3997. return _wfopen_s(&file, path, mode) == 0 ? file : 0;
  3998. #else
  3999. return _wfopen(path, mode);
  4000. #endif
  4001. }
  4002. #else
  4003. PUGI__FN char* convert_path_heap(const wchar_t* str)
  4004. {
  4005. assert(str);
  4006. // first pass: get length in utf8 characters
  4007. size_t length = strlength_wide(str);
  4008. size_t size = as_utf8_begin(str, length);
  4009. // allocate resulting string
  4010. char* result = static_cast<char*>(xml_memory::allocate(size + 1));
  4011. if (!result) return 0;
  4012. // second pass: convert to utf8
  4013. as_utf8_end(result, size, str, length);
  4014. // zero-terminate
  4015. result[size] = 0;
  4016. return result;
  4017. }
  4018. PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
  4019. {
  4020. // there is no standard function to open wide paths, so our best bet is to try utf8 path
  4021. char* path_utf8 = convert_path_heap(path);
  4022. if (!path_utf8) return 0;
  4023. // convert mode to ASCII (we mirror _wfopen interface)
  4024. char mode_ascii[4] = {0};
  4025. for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
  4026. // try to open the utf8 path
  4027. FILE* result = fopen(path_utf8, mode_ascii);
  4028. // free dummy buffer
  4029. xml_memory::deallocate(path_utf8);
  4030. return result;
  4031. }
  4032. #endif
  4033. PUGI__FN FILE* open_file(const char* path, const char* mode)
  4034. {
  4035. #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
  4036. FILE* file = 0;
  4037. return fopen_s(&file, path, mode) == 0 ? file : 0;
  4038. #else
  4039. return fopen(path, mode);
  4040. #endif
  4041. }
  4042. PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
  4043. {
  4044. if (!file) return false;
  4045. xml_writer_file writer(file);
  4046. doc.save(writer, indent, flags, encoding);
  4047. return ferror(file) == 0;
  4048. }
  4049. struct name_null_sentry
  4050. {
  4051. xml_node_struct* node;
  4052. char_t* name;
  4053. name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
  4054. {
  4055. node->name = 0;
  4056. }
  4057. ~name_null_sentry()
  4058. {
  4059. node->name = name;
  4060. }
  4061. };
  4062. PUGI__NS_END
  4063. namespace pugi
  4064. {
  4065. PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
  4066. {
  4067. }
  4068. PUGI__FN void xml_writer_file::write(const void* data, size_t size)
  4069. {
  4070. size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
  4071. (void)!result; // unfortunately we can't do proper error handling here
  4072. }
  4073. #ifndef PUGIXML_NO_STL
  4074. PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
  4075. {
  4076. }
  4077. PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
  4078. {
  4079. }
  4080. PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
  4081. {
  4082. if (narrow_stream)
  4083. {
  4084. assert(!wide_stream);
  4085. narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
  4086. }
  4087. else
  4088. {
  4089. assert(wide_stream);
  4090. assert(size % sizeof(wchar_t) == 0);
  4091. wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
  4092. }
  4093. }
  4094. #endif
  4095. PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
  4096. {
  4097. }
  4098. PUGI__FN xml_tree_walker::~xml_tree_walker()
  4099. {
  4100. }
  4101. PUGI__FN int xml_tree_walker::depth() const
  4102. {
  4103. return _depth;
  4104. }
  4105. PUGI__FN bool xml_tree_walker::begin(xml_node&)
  4106. {
  4107. return true;
  4108. }
  4109. PUGI__FN bool xml_tree_walker::end(xml_node&)
  4110. {
  4111. return true;
  4112. }
  4113. PUGI__FN xml_attribute::xml_attribute(): _attr(0)
  4114. {
  4115. }
  4116. PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
  4117. {
  4118. }
  4119. PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
  4120. {
  4121. }
  4122. PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
  4123. {
  4124. return _attr ? unspecified_bool_xml_attribute : 0;
  4125. }
  4126. PUGI__FN bool xml_attribute::operator!() const
  4127. {
  4128. return !_attr;
  4129. }
  4130. PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
  4131. {
  4132. return (_attr == r._attr);
  4133. }
  4134. PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
  4135. {
  4136. return (_attr != r._attr);
  4137. }
  4138. PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
  4139. {
  4140. return (_attr < r._attr);
  4141. }
  4142. PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
  4143. {
  4144. return (_attr > r._attr);
  4145. }
  4146. PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
  4147. {
  4148. return (_attr <= r._attr);
  4149. }
  4150. PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
  4151. {
  4152. return (_attr >= r._attr);
  4153. }
  4154. PUGI__FN xml_attribute xml_attribute::next_attribute() const
  4155. {
  4156. return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
  4157. }
  4158. PUGI__FN xml_attribute xml_attribute::previous_attribute() const
  4159. {
  4160. return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
  4161. }
  4162. PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
  4163. {
  4164. return (_attr && _attr->value) ? _attr->value + 0 : def;
  4165. }
  4166. PUGI__FN int xml_attribute::as_int(int def) const
  4167. {
  4168. return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
  4169. }
  4170. PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
  4171. {
  4172. return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
  4173. }
  4174. PUGI__FN double xml_attribute::as_double(double def) const
  4175. {
  4176. return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
  4177. }
  4178. PUGI__FN float xml_attribute::as_float(float def) const
  4179. {
  4180. return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
  4181. }
  4182. PUGI__FN bool xml_attribute::as_bool(bool def) const
  4183. {
  4184. return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
  4185. }
  4186. #ifdef PUGIXML_HAS_LONG_LONG
  4187. PUGI__FN long long xml_attribute::as_llong(long long def) const
  4188. {
  4189. return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
  4190. }
  4191. PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
  4192. {
  4193. return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
  4194. }
  4195. #endif
  4196. PUGI__FN bool xml_attribute::empty() const
  4197. {
  4198. return !_attr;
  4199. }
  4200. PUGI__FN const char_t* xml_attribute::name() const
  4201. {
  4202. return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
  4203. }
  4204. PUGI__FN const char_t* xml_attribute::value() const
  4205. {
  4206. return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
  4207. }
  4208. PUGI__FN size_t xml_attribute::hash_value() const
  4209. {
  4210. return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
  4211. }
  4212. PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
  4213. {
  4214. return _attr;
  4215. }
  4216. PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
  4217. {
  4218. set_value(rhs);
  4219. return *this;
  4220. }
  4221. PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
  4222. {
  4223. set_value(rhs);
  4224. return *this;
  4225. }
  4226. PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
  4227. {
  4228. set_value(rhs);
  4229. return *this;
  4230. }
  4231. PUGI__FN xml_attribute& xml_attribute::operator=(long rhs)
  4232. {
  4233. set_value(rhs);
  4234. return *this;
  4235. }
  4236. PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
  4237. {
  4238. set_value(rhs);
  4239. return *this;
  4240. }
  4241. PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
  4242. {
  4243. set_value(rhs);
  4244. return *this;
  4245. }
  4246. PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
  4247. {
  4248. set_value(rhs);
  4249. return *this;
  4250. }
  4251. PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
  4252. {
  4253. set_value(rhs);
  4254. return *this;
  4255. }
  4256. #ifdef PUGIXML_HAS_LONG_LONG
  4257. PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
  4258. {
  4259. set_value(rhs);
  4260. return *this;
  4261. }
  4262. PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
  4263. {
  4264. set_value(rhs);
  4265. return *this;
  4266. }
  4267. #endif
  4268. PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
  4269. {
  4270. if (!_attr) return false;
  4271. return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
  4272. }
  4273. PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
  4274. {
  4275. if (!_attr) return false;
  4276. return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
  4277. }
  4278. PUGI__FN bool xml_attribute::set_value(int rhs)
  4279. {
  4280. if (!_attr) return false;
  4281. return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
  4282. }
  4283. PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
  4284. {
  4285. if (!_attr) return false;
  4286. return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
  4287. }
  4288. PUGI__FN bool xml_attribute::set_value(long rhs)
  4289. {
  4290. if (!_attr) return false;
  4291. return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
  4292. }
  4293. PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
  4294. {
  4295. if (!_attr) return false;
  4296. return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
  4297. }
  4298. PUGI__FN bool xml_attribute::set_value(double rhs)
  4299. {
  4300. if (!_attr) return false;
  4301. return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision);
  4302. }
  4303. PUGI__FN bool xml_attribute::set_value(double rhs, int precision)
  4304. {
  4305. if (!_attr) return false;
  4306. return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
  4307. }
  4308. PUGI__FN bool xml_attribute::set_value(float rhs)
  4309. {
  4310. if (!_attr) return false;
  4311. return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision);
  4312. }
  4313. PUGI__FN bool xml_attribute::set_value(float rhs, int precision)
  4314. {
  4315. if (!_attr) return false;
  4316. return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
  4317. }
  4318. PUGI__FN bool xml_attribute::set_value(bool rhs)
  4319. {
  4320. if (!_attr) return false;
  4321. return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
  4322. }
  4323. #ifdef PUGIXML_HAS_LONG_LONG
  4324. PUGI__FN bool xml_attribute::set_value(long long rhs)
  4325. {
  4326. if (!_attr) return false;
  4327. return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
  4328. }
  4329. PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
  4330. {
  4331. if (!_attr) return false;
  4332. return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
  4333. }
  4334. #endif
  4335. #ifdef __BORLANDC__
  4336. PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
  4337. {
  4338. return (bool)lhs && rhs;
  4339. }
  4340. PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
  4341. {
  4342. return (bool)lhs || rhs;
  4343. }
  4344. #endif
  4345. PUGI__FN xml_node::xml_node(): _root(0)
  4346. {
  4347. }
  4348. PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
  4349. {
  4350. }
  4351. PUGI__FN static void unspecified_bool_xml_node(xml_node***)
  4352. {
  4353. }
  4354. PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
  4355. {
  4356. return _root ? unspecified_bool_xml_node : 0;
  4357. }
  4358. PUGI__FN bool xml_node::operator!() const
  4359. {
  4360. return !_root;
  4361. }
  4362. PUGI__FN xml_node::iterator xml_node::begin() const
  4363. {
  4364. return iterator(_root ? _root->first_child + 0 : 0, _root);
  4365. }
  4366. PUGI__FN xml_node::iterator xml_node::end() const
  4367. {
  4368. return iterator(0, _root);
  4369. }
  4370. PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
  4371. {
  4372. return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
  4373. }
  4374. PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
  4375. {
  4376. return attribute_iterator(0, _root);
  4377. }
  4378. PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
  4379. {
  4380. return xml_object_range<xml_node_iterator>(begin(), end());
  4381. }
  4382. PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
  4383. {
  4384. return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
  4385. }
  4386. PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
  4387. {
  4388. return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
  4389. }
  4390. PUGI__FN bool xml_node::operator==(const xml_node& r) const
  4391. {
  4392. return (_root == r._root);
  4393. }
  4394. PUGI__FN bool xml_node::operator!=(const xml_node& r) const
  4395. {
  4396. return (_root != r._root);
  4397. }
  4398. PUGI__FN bool xml_node::operator<(const xml_node& r) const
  4399. {
  4400. return (_root < r._root);
  4401. }
  4402. PUGI__FN bool xml_node::operator>(const xml_node& r) const
  4403. {
  4404. return (_root > r._root);
  4405. }
  4406. PUGI__FN bool xml_node::operator<=(const xml_node& r) const
  4407. {
  4408. return (_root <= r._root);
  4409. }
  4410. PUGI__FN bool xml_node::operator>=(const xml_node& r) const
  4411. {
  4412. return (_root >= r._root);
  4413. }
  4414. PUGI__FN bool xml_node::empty() const
  4415. {
  4416. return !_root;
  4417. }
  4418. PUGI__FN const char_t* xml_node::name() const
  4419. {
  4420. return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
  4421. }
  4422. PUGI__FN xml_node_type xml_node::type() const
  4423. {
  4424. return _root ? PUGI__NODETYPE(_root) : node_null;
  4425. }
  4426. PUGI__FN const char_t* xml_node::value() const
  4427. {
  4428. return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
  4429. }
  4430. PUGI__FN xml_node xml_node::child(const char_t* name_) const
  4431. {
  4432. if (!_root) return xml_node();
  4433. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  4434. if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
  4435. return xml_node();
  4436. }
  4437. PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
  4438. {
  4439. if (!_root) return xml_attribute();
  4440. for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
  4441. if (i->name && impl::strequal(name_, i->name))
  4442. return xml_attribute(i);
  4443. return xml_attribute();
  4444. }
  4445. PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
  4446. {
  4447. if (!_root) return xml_node();
  4448. for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
  4449. if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
  4450. return xml_node();
  4451. }
  4452. PUGI__FN xml_node xml_node::next_sibling() const
  4453. {
  4454. return _root ? xml_node(_root->next_sibling) : xml_node();
  4455. }
  4456. PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
  4457. {
  4458. if (!_root) return xml_node();
  4459. for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
  4460. if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
  4461. return xml_node();
  4462. }
  4463. PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
  4464. {
  4465. xml_attribute_struct* hint = hint_._attr;
  4466. // if hint is not an attribute of node, behavior is not defined
  4467. assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
  4468. if (!_root) return xml_attribute();
  4469. // optimistically search from hint up until the end
  4470. for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
  4471. if (i->name && impl::strequal(name_, i->name))
  4472. {
  4473. // update hint to maximize efficiency of searching for consecutive attributes
  4474. hint_._attr = i->next_attribute;
  4475. return xml_attribute(i);
  4476. }
  4477. // wrap around and search from the first attribute until the hint
  4478. // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
  4479. for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
  4480. if (j->name && impl::strequal(name_, j->name))
  4481. {
  4482. // update hint to maximize efficiency of searching for consecutive attributes
  4483. hint_._attr = j->next_attribute;
  4484. return xml_attribute(j);
  4485. }
  4486. return xml_attribute();
  4487. }
  4488. PUGI__FN xml_node xml_node::previous_sibling() const
  4489. {
  4490. if (!_root) return xml_node();
  4491. if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
  4492. else return xml_node();
  4493. }
  4494. PUGI__FN xml_node xml_node::parent() const
  4495. {
  4496. return _root ? xml_node(_root->parent) : xml_node();
  4497. }
  4498. PUGI__FN xml_node xml_node::root() const
  4499. {
  4500. return _root ? xml_node(&impl::get_document(_root)) : xml_node();
  4501. }
  4502. PUGI__FN xml_text xml_node::text() const
  4503. {
  4504. return xml_text(_root);
  4505. }
  4506. PUGI__FN const char_t* xml_node::child_value() const
  4507. {
  4508. if (!_root) return PUGIXML_TEXT("");
  4509. // element nodes can have value if parse_embed_pcdata was used
  4510. if (PUGI__NODETYPE(_root) == node_element && _root->value)
  4511. return _root->value;
  4512. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  4513. if (impl::is_text_node(i) && i->value)
  4514. return i->value;
  4515. return PUGIXML_TEXT("");
  4516. }
  4517. PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
  4518. {
  4519. return child(name_).child_value();
  4520. }
  4521. PUGI__FN xml_attribute xml_node::first_attribute() const
  4522. {
  4523. return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
  4524. }
  4525. PUGI__FN xml_attribute xml_node::last_attribute() const
  4526. {
  4527. return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
  4528. }
  4529. PUGI__FN xml_node xml_node::first_child() const
  4530. {
  4531. return _root ? xml_node(_root->first_child) : xml_node();
  4532. }
  4533. PUGI__FN xml_node xml_node::last_child() const
  4534. {
  4535. return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
  4536. }
  4537. PUGI__FN bool xml_node::set_name(const char_t* rhs)
  4538. {
  4539. xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
  4540. if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
  4541. return false;
  4542. return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
  4543. }
  4544. PUGI__FN bool xml_node::set_value(const char_t* rhs)
  4545. {
  4546. xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
  4547. if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
  4548. return false;
  4549. return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
  4550. }
  4551. PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
  4552. {
  4553. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4554. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4555. if (!alloc.reserve()) return xml_attribute();
  4556. xml_attribute a(impl::allocate_attribute(alloc));
  4557. if (!a) return xml_attribute();
  4558. impl::append_attribute(a._attr, _root);
  4559. a.set_name(name_);
  4560. return a;
  4561. }
  4562. PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
  4563. {
  4564. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4565. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4566. if (!alloc.reserve()) return xml_attribute();
  4567. xml_attribute a(impl::allocate_attribute(alloc));
  4568. if (!a) return xml_attribute();
  4569. impl::prepend_attribute(a._attr, _root);
  4570. a.set_name(name_);
  4571. return a;
  4572. }
  4573. PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
  4574. {
  4575. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4576. if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
  4577. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4578. if (!alloc.reserve()) return xml_attribute();
  4579. xml_attribute a(impl::allocate_attribute(alloc));
  4580. if (!a) return xml_attribute();
  4581. impl::insert_attribute_after(a._attr, attr._attr, _root);
  4582. a.set_name(name_);
  4583. return a;
  4584. }
  4585. PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
  4586. {
  4587. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4588. if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
  4589. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4590. if (!alloc.reserve()) return xml_attribute();
  4591. xml_attribute a(impl::allocate_attribute(alloc));
  4592. if (!a) return xml_attribute();
  4593. impl::insert_attribute_before(a._attr, attr._attr, _root);
  4594. a.set_name(name_);
  4595. return a;
  4596. }
  4597. PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
  4598. {
  4599. if (!proto) return xml_attribute();
  4600. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4601. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4602. if (!alloc.reserve()) return xml_attribute();
  4603. xml_attribute a(impl::allocate_attribute(alloc));
  4604. if (!a) return xml_attribute();
  4605. impl::append_attribute(a._attr, _root);
  4606. impl::node_copy_attribute(a._attr, proto._attr);
  4607. return a;
  4608. }
  4609. PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
  4610. {
  4611. if (!proto) return xml_attribute();
  4612. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4613. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4614. if (!alloc.reserve()) return xml_attribute();
  4615. xml_attribute a(impl::allocate_attribute(alloc));
  4616. if (!a) return xml_attribute();
  4617. impl::prepend_attribute(a._attr, _root);
  4618. impl::node_copy_attribute(a._attr, proto._attr);
  4619. return a;
  4620. }
  4621. PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
  4622. {
  4623. if (!proto) return xml_attribute();
  4624. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4625. if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
  4626. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4627. if (!alloc.reserve()) return xml_attribute();
  4628. xml_attribute a(impl::allocate_attribute(alloc));
  4629. if (!a) return xml_attribute();
  4630. impl::insert_attribute_after(a._attr, attr._attr, _root);
  4631. impl::node_copy_attribute(a._attr, proto._attr);
  4632. return a;
  4633. }
  4634. PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
  4635. {
  4636. if (!proto) return xml_attribute();
  4637. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4638. if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
  4639. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4640. if (!alloc.reserve()) return xml_attribute();
  4641. xml_attribute a(impl::allocate_attribute(alloc));
  4642. if (!a) return xml_attribute();
  4643. impl::insert_attribute_before(a._attr, attr._attr, _root);
  4644. impl::node_copy_attribute(a._attr, proto._attr);
  4645. return a;
  4646. }
  4647. PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
  4648. {
  4649. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4650. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4651. if (!alloc.reserve()) return xml_node();
  4652. xml_node n(impl::allocate_node(alloc, type_));
  4653. if (!n) return xml_node();
  4654. impl::append_node(n._root, _root);
  4655. if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
  4656. return n;
  4657. }
  4658. PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
  4659. {
  4660. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4661. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4662. if (!alloc.reserve()) return xml_node();
  4663. xml_node n(impl::allocate_node(alloc, type_));
  4664. if (!n) return xml_node();
  4665. impl::prepend_node(n._root, _root);
  4666. if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
  4667. return n;
  4668. }
  4669. PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
  4670. {
  4671. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4672. if (!node._root || node._root->parent != _root) return xml_node();
  4673. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4674. if (!alloc.reserve()) return xml_node();
  4675. xml_node n(impl::allocate_node(alloc, type_));
  4676. if (!n) return xml_node();
  4677. impl::insert_node_before(n._root, node._root);
  4678. if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
  4679. return n;
  4680. }
  4681. PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
  4682. {
  4683. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4684. if (!node._root || node._root->parent != _root) return xml_node();
  4685. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4686. if (!alloc.reserve()) return xml_node();
  4687. xml_node n(impl::allocate_node(alloc, type_));
  4688. if (!n) return xml_node();
  4689. impl::insert_node_after(n._root, node._root);
  4690. if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
  4691. return n;
  4692. }
  4693. PUGI__FN xml_node xml_node::append_child(const char_t* name_)
  4694. {
  4695. xml_node result = append_child(node_element);
  4696. result.set_name(name_);
  4697. return result;
  4698. }
  4699. PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
  4700. {
  4701. xml_node result = prepend_child(node_element);
  4702. result.set_name(name_);
  4703. return result;
  4704. }
  4705. PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
  4706. {
  4707. xml_node result = insert_child_after(node_element, node);
  4708. result.set_name(name_);
  4709. return result;
  4710. }
  4711. PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
  4712. {
  4713. xml_node result = insert_child_before(node_element, node);
  4714. result.set_name(name_);
  4715. return result;
  4716. }
  4717. PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
  4718. {
  4719. xml_node_type type_ = proto.type();
  4720. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4721. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4722. if (!alloc.reserve()) return xml_node();
  4723. xml_node n(impl::allocate_node(alloc, type_));
  4724. if (!n) return xml_node();
  4725. impl::append_node(n._root, _root);
  4726. impl::node_copy_tree(n._root, proto._root);
  4727. return n;
  4728. }
  4729. PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
  4730. {
  4731. xml_node_type type_ = proto.type();
  4732. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4733. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4734. if (!alloc.reserve()) return xml_node();
  4735. xml_node n(impl::allocate_node(alloc, type_));
  4736. if (!n) return xml_node();
  4737. impl::prepend_node(n._root, _root);
  4738. impl::node_copy_tree(n._root, proto._root);
  4739. return n;
  4740. }
  4741. PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
  4742. {
  4743. xml_node_type type_ = proto.type();
  4744. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4745. if (!node._root || node._root->parent != _root) return xml_node();
  4746. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4747. if (!alloc.reserve()) return xml_node();
  4748. xml_node n(impl::allocate_node(alloc, type_));
  4749. if (!n) return xml_node();
  4750. impl::insert_node_after(n._root, node._root);
  4751. impl::node_copy_tree(n._root, proto._root);
  4752. return n;
  4753. }
  4754. PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
  4755. {
  4756. xml_node_type type_ = proto.type();
  4757. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4758. if (!node._root || node._root->parent != _root) return xml_node();
  4759. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4760. if (!alloc.reserve()) return xml_node();
  4761. xml_node n(impl::allocate_node(alloc, type_));
  4762. if (!n) return xml_node();
  4763. impl::insert_node_before(n._root, node._root);
  4764. impl::node_copy_tree(n._root, proto._root);
  4765. return n;
  4766. }
  4767. PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
  4768. {
  4769. if (!impl::allow_move(*this, moved)) return xml_node();
  4770. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4771. if (!alloc.reserve()) return xml_node();
  4772. // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
  4773. impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
  4774. impl::remove_node(moved._root);
  4775. impl::append_node(moved._root, _root);
  4776. return moved;
  4777. }
  4778. PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
  4779. {
  4780. if (!impl::allow_move(*this, moved)) return xml_node();
  4781. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4782. if (!alloc.reserve()) return xml_node();
  4783. // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
  4784. impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
  4785. impl::remove_node(moved._root);
  4786. impl::prepend_node(moved._root, _root);
  4787. return moved;
  4788. }
  4789. PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
  4790. {
  4791. if (!impl::allow_move(*this, moved)) return xml_node();
  4792. if (!node._root || node._root->parent != _root) return xml_node();
  4793. if (moved._root == node._root) return xml_node();
  4794. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4795. if (!alloc.reserve()) return xml_node();
  4796. // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
  4797. impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
  4798. impl::remove_node(moved._root);
  4799. impl::insert_node_after(moved._root, node._root);
  4800. return moved;
  4801. }
  4802. PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
  4803. {
  4804. if (!impl::allow_move(*this, moved)) return xml_node();
  4805. if (!node._root || node._root->parent != _root) return xml_node();
  4806. if (moved._root == node._root) return xml_node();
  4807. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4808. if (!alloc.reserve()) return xml_node();
  4809. // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
  4810. impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
  4811. impl::remove_node(moved._root);
  4812. impl::insert_node_before(moved._root, node._root);
  4813. return moved;
  4814. }
  4815. PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
  4816. {
  4817. return remove_attribute(attribute(name_));
  4818. }
  4819. PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
  4820. {
  4821. if (!_root || !a._attr) return false;
  4822. if (!impl::is_attribute_of(a._attr, _root)) return false;
  4823. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4824. if (!alloc.reserve()) return false;
  4825. impl::remove_attribute(a._attr, _root);
  4826. impl::destroy_attribute(a._attr, alloc);
  4827. return true;
  4828. }
  4829. PUGI__FN bool xml_node::remove_attributes()
  4830. {
  4831. if (!_root) return false;
  4832. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4833. if (!alloc.reserve()) return false;
  4834. for (xml_attribute_struct* attr = _root->first_attribute; attr; )
  4835. {
  4836. xml_attribute_struct* next = attr->next_attribute;
  4837. impl::destroy_attribute(attr, alloc);
  4838. attr = next;
  4839. }
  4840. _root->first_attribute = 0;
  4841. return true;
  4842. }
  4843. PUGI__FN bool xml_node::remove_child(const char_t* name_)
  4844. {
  4845. return remove_child(child(name_));
  4846. }
  4847. PUGI__FN bool xml_node::remove_child(const xml_node& n)
  4848. {
  4849. if (!_root || !n._root || n._root->parent != _root) return false;
  4850. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4851. if (!alloc.reserve()) return false;
  4852. impl::remove_node(n._root);
  4853. impl::destroy_node(n._root, alloc);
  4854. return true;
  4855. }
  4856. PUGI__FN bool xml_node::remove_children()
  4857. {
  4858. if (!_root) return false;
  4859. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4860. if (!alloc.reserve()) return false;
  4861. for (xml_node_struct* cur = _root->first_child; cur; )
  4862. {
  4863. xml_node_struct* next = cur->next_sibling;
  4864. impl::destroy_node(cur, alloc);
  4865. cur = next;
  4866. }
  4867. _root->first_child = 0;
  4868. return true;
  4869. }
  4870. PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
  4871. {
  4872. // append_buffer is only valid for elements/documents
  4873. if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
  4874. // get document node
  4875. impl::xml_document_struct* doc = &impl::get_document(_root);
  4876. // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
  4877. doc->header |= impl::xml_memory_page_contents_shared_mask;
  4878. // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
  4879. impl::xml_memory_page* page = 0;
  4880. impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page));
  4881. (void)page;
  4882. if (!extra) return impl::make_parse_result(status_out_of_memory);
  4883. #ifdef PUGIXML_COMPACT
  4884. // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned
  4885. // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account
  4886. extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1));
  4887. #endif
  4888. // add extra buffer to the list
  4889. extra->buffer = 0;
  4890. extra->next = doc->extra_buffers;
  4891. doc->extra_buffers = extra;
  4892. // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
  4893. impl::name_null_sentry sentry(_root);
  4894. return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
  4895. }
  4896. PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
  4897. {
  4898. if (!_root) return xml_node();
  4899. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  4900. if (i->name && impl::strequal(name_, i->name))
  4901. {
  4902. for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
  4903. if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
  4904. return xml_node(i);
  4905. }
  4906. return xml_node();
  4907. }
  4908. PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
  4909. {
  4910. if (!_root) return xml_node();
  4911. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  4912. for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
  4913. if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
  4914. return xml_node(i);
  4915. return xml_node();
  4916. }
  4917. #ifndef PUGIXML_NO_STL
  4918. PUGI__FN string_t xml_node::path(char_t delimiter) const
  4919. {
  4920. if (!_root) return string_t();
  4921. size_t offset = 0;
  4922. for (xml_node_struct* i = _root; i; i = i->parent)
  4923. {
  4924. offset += (i != _root);
  4925. offset += i->name ? impl::strlength(i->name) : 0;
  4926. }
  4927. string_t result;
  4928. result.resize(offset);
  4929. for (xml_node_struct* j = _root; j; j = j->parent)
  4930. {
  4931. if (j != _root)
  4932. result[--offset] = delimiter;
  4933. if (j->name)
  4934. {
  4935. size_t length = impl::strlength(j->name);
  4936. offset -= length;
  4937. memcpy(&result[offset], j->name, length * sizeof(char_t));
  4938. }
  4939. }
  4940. assert(offset == 0);
  4941. return result;
  4942. }
  4943. #endif
  4944. PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
  4945. {
  4946. xml_node context = path_[0] == delimiter ? root() : *this;
  4947. if (!context._root) return xml_node();
  4948. const char_t* path_segment = path_;
  4949. while (*path_segment == delimiter) ++path_segment;
  4950. const char_t* path_segment_end = path_segment;
  4951. while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
  4952. if (path_segment == path_segment_end) return context;
  4953. const char_t* next_segment = path_segment_end;
  4954. while (*next_segment == delimiter) ++next_segment;
  4955. if (*path_segment == '.' && path_segment + 1 == path_segment_end)
  4956. return context.first_element_by_path(next_segment, delimiter);
  4957. else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
  4958. return context.parent().first_element_by_path(next_segment, delimiter);
  4959. else
  4960. {
  4961. for (xml_node_struct* j = context._root->first_child; j; j = j->next_sibling)
  4962. {
  4963. if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
  4964. {
  4965. xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
  4966. if (subsearch) return subsearch;
  4967. }
  4968. }
  4969. return xml_node();
  4970. }
  4971. }
  4972. PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
  4973. {
  4974. walker._depth = -1;
  4975. xml_node arg_begin(_root);
  4976. if (!walker.begin(arg_begin)) return false;
  4977. xml_node_struct* cur = _root ? _root->first_child + 0 : 0;
  4978. if (cur)
  4979. {
  4980. ++walker._depth;
  4981. do
  4982. {
  4983. xml_node arg_for_each(cur);
  4984. if (!walker.for_each(arg_for_each))
  4985. return false;
  4986. if (cur->first_child)
  4987. {
  4988. ++walker._depth;
  4989. cur = cur->first_child;
  4990. }
  4991. else if (cur->next_sibling)
  4992. cur = cur->next_sibling;
  4993. else
  4994. {
  4995. while (!cur->next_sibling && cur != _root && cur->parent)
  4996. {
  4997. --walker._depth;
  4998. cur = cur->parent;
  4999. }
  5000. if (cur != _root)
  5001. cur = cur->next_sibling;
  5002. }
  5003. }
  5004. while (cur && cur != _root);
  5005. }
  5006. assert(walker._depth == -1);
  5007. xml_node arg_end(_root);
  5008. return walker.end(arg_end);
  5009. }
  5010. PUGI__FN size_t xml_node::hash_value() const
  5011. {
  5012. return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
  5013. }
  5014. PUGI__FN xml_node_struct* xml_node::internal_object() const
  5015. {
  5016. return _root;
  5017. }
  5018. PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
  5019. {
  5020. if (!_root) return;
  5021. impl::xml_buffered_writer buffered_writer(writer, encoding);
  5022. impl::node_output(buffered_writer, _root, indent, flags, depth);
  5023. buffered_writer.flush();
  5024. }
  5025. #ifndef PUGIXML_NO_STL
  5026. PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
  5027. {
  5028. xml_writer_stream writer(stream);
  5029. print(writer, indent, flags, encoding, depth);
  5030. }
  5031. PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
  5032. {
  5033. xml_writer_stream writer(stream);
  5034. print(writer, indent, flags, encoding_wchar, depth);
  5035. }
  5036. #endif
  5037. PUGI__FN ptrdiff_t xml_node::offset_debug() const
  5038. {
  5039. if (!_root) return -1;
  5040. impl::xml_document_struct& doc = impl::get_document(_root);
  5041. // we can determine the offset reliably only if there is exactly once parse buffer
  5042. if (!doc.buffer || doc.extra_buffers) return -1;
  5043. switch (type())
  5044. {
  5045. case node_document:
  5046. return 0;
  5047. case node_element:
  5048. case node_declaration:
  5049. case node_pi:
  5050. return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
  5051. case node_pcdata:
  5052. case node_cdata:
  5053. case node_comment:
  5054. case node_doctype:
  5055. return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
  5056. default:
  5057. assert(false && "Invalid node type"); // unreachable
  5058. return -1;
  5059. }
  5060. }
  5061. #ifdef __BORLANDC__
  5062. PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
  5063. {
  5064. return (bool)lhs && rhs;
  5065. }
  5066. PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
  5067. {
  5068. return (bool)lhs || rhs;
  5069. }
  5070. #endif
  5071. PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
  5072. {
  5073. }
  5074. PUGI__FN xml_node_struct* xml_text::_data() const
  5075. {
  5076. if (!_root || impl::is_text_node(_root)) return _root;
  5077. // element nodes can have value if parse_embed_pcdata was used
  5078. if (PUGI__NODETYPE(_root) == node_element && _root->value)
  5079. return _root;
  5080. for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
  5081. if (impl::is_text_node(node))
  5082. return node;
  5083. return 0;
  5084. }
  5085. PUGI__FN xml_node_struct* xml_text::_data_new()
  5086. {
  5087. xml_node_struct* d = _data();
  5088. if (d) return d;
  5089. return xml_node(_root).append_child(node_pcdata).internal_object();
  5090. }
  5091. PUGI__FN xml_text::xml_text(): _root(0)
  5092. {
  5093. }
  5094. PUGI__FN static void unspecified_bool_xml_text(xml_text***)
  5095. {
  5096. }
  5097. PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
  5098. {
  5099. return _data() ? unspecified_bool_xml_text : 0;
  5100. }
  5101. PUGI__FN bool xml_text::operator!() const
  5102. {
  5103. return !_data();
  5104. }
  5105. PUGI__FN bool xml_text::empty() const
  5106. {
  5107. return _data() == 0;
  5108. }
  5109. PUGI__FN const char_t* xml_text::get() const
  5110. {
  5111. xml_node_struct* d = _data();
  5112. return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
  5113. }
  5114. PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
  5115. {
  5116. xml_node_struct* d = _data();
  5117. return (d && d->value) ? d->value + 0 : def;
  5118. }
  5119. PUGI__FN int xml_text::as_int(int def) const
  5120. {
  5121. xml_node_struct* d = _data();
  5122. return (d && d->value) ? impl::get_value_int(d->value) : def;
  5123. }
  5124. PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
  5125. {
  5126. xml_node_struct* d = _data();
  5127. return (d && d->value) ? impl::get_value_uint(d->value) : def;
  5128. }
  5129. PUGI__FN double xml_text::as_double(double def) const
  5130. {
  5131. xml_node_struct* d = _data();
  5132. return (d && d->value) ? impl::get_value_double(d->value) : def;
  5133. }
  5134. PUGI__FN float xml_text::as_float(float def) const
  5135. {
  5136. xml_node_struct* d = _data();
  5137. return (d && d->value) ? impl::get_value_float(d->value) : def;
  5138. }
  5139. PUGI__FN bool xml_text::as_bool(bool def) const
  5140. {
  5141. xml_node_struct* d = _data();
  5142. return (d && d->value) ? impl::get_value_bool(d->value) : def;
  5143. }
  5144. #ifdef PUGIXML_HAS_LONG_LONG
  5145. PUGI__FN long long xml_text::as_llong(long long def) const
  5146. {
  5147. xml_node_struct* d = _data();
  5148. return (d && d->value) ? impl::get_value_llong(d->value) : def;
  5149. }
  5150. PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
  5151. {
  5152. xml_node_struct* d = _data();
  5153. return (d && d->value) ? impl::get_value_ullong(d->value) : def;
  5154. }
  5155. #endif
  5156. PUGI__FN bool xml_text::set(const char_t* rhs)
  5157. {
  5158. xml_node_struct* dn = _data_new();
  5159. return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
  5160. }
  5161. PUGI__FN bool xml_text::set(int rhs)
  5162. {
  5163. xml_node_struct* dn = _data_new();
  5164. return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
  5165. }
  5166. PUGI__FN bool xml_text::set(unsigned int rhs)
  5167. {
  5168. xml_node_struct* dn = _data_new();
  5169. return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
  5170. }
  5171. PUGI__FN bool xml_text::set(long rhs)
  5172. {
  5173. xml_node_struct* dn = _data_new();
  5174. return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
  5175. }
  5176. PUGI__FN bool xml_text::set(unsigned long rhs)
  5177. {
  5178. xml_node_struct* dn = _data_new();
  5179. return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
  5180. }
  5181. PUGI__FN bool xml_text::set(float rhs)
  5182. {
  5183. xml_node_struct* dn = _data_new();
  5184. return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision) : false;
  5185. }
  5186. PUGI__FN bool xml_text::set(float rhs, int precision)
  5187. {
  5188. xml_node_struct* dn = _data_new();
  5189. return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
  5190. }
  5191. PUGI__FN bool xml_text::set(double rhs)
  5192. {
  5193. xml_node_struct* dn = _data_new();
  5194. return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision) : false;
  5195. }
  5196. PUGI__FN bool xml_text::set(double rhs, int precision)
  5197. {
  5198. xml_node_struct* dn = _data_new();
  5199. return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
  5200. }
  5201. PUGI__FN bool xml_text::set(bool rhs)
  5202. {
  5203. xml_node_struct* dn = _data_new();
  5204. return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
  5205. }
  5206. #ifdef PUGIXML_HAS_LONG_LONG
  5207. PUGI__FN bool xml_text::set(long long rhs)
  5208. {
  5209. xml_node_struct* dn = _data_new();
  5210. return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
  5211. }
  5212. PUGI__FN bool xml_text::set(unsigned long long rhs)
  5213. {
  5214. xml_node_struct* dn = _data_new();
  5215. return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
  5216. }
  5217. #endif
  5218. PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
  5219. {
  5220. set(rhs);
  5221. return *this;
  5222. }
  5223. PUGI__FN xml_text& xml_text::operator=(int rhs)
  5224. {
  5225. set(rhs);
  5226. return *this;
  5227. }
  5228. PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
  5229. {
  5230. set(rhs);
  5231. return *this;
  5232. }
  5233. PUGI__FN xml_text& xml_text::operator=(long rhs)
  5234. {
  5235. set(rhs);
  5236. return *this;
  5237. }
  5238. PUGI__FN xml_text& xml_text::operator=(unsigned long rhs)
  5239. {
  5240. set(rhs);
  5241. return *this;
  5242. }
  5243. PUGI__FN xml_text& xml_text::operator=(double rhs)
  5244. {
  5245. set(rhs);
  5246. return *this;
  5247. }
  5248. PUGI__FN xml_text& xml_text::operator=(float rhs)
  5249. {
  5250. set(rhs);
  5251. return *this;
  5252. }
  5253. PUGI__FN xml_text& xml_text::operator=(bool rhs)
  5254. {
  5255. set(rhs);
  5256. return *this;
  5257. }
  5258. #ifdef PUGIXML_HAS_LONG_LONG
  5259. PUGI__FN xml_text& xml_text::operator=(long long rhs)
  5260. {
  5261. set(rhs);
  5262. return *this;
  5263. }
  5264. PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
  5265. {
  5266. set(rhs);
  5267. return *this;
  5268. }
  5269. #endif
  5270. PUGI__FN xml_node xml_text::data() const
  5271. {
  5272. return xml_node(_data());
  5273. }
  5274. #ifdef __BORLANDC__
  5275. PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
  5276. {
  5277. return (bool)lhs && rhs;
  5278. }
  5279. PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
  5280. {
  5281. return (bool)lhs || rhs;
  5282. }
  5283. #endif
  5284. PUGI__FN xml_node_iterator::xml_node_iterator()
  5285. {
  5286. }
  5287. PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
  5288. {
  5289. }
  5290. PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
  5291. {
  5292. }
  5293. PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
  5294. {
  5295. return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
  5296. }
  5297. PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
  5298. {
  5299. return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
  5300. }
  5301. PUGI__FN xml_node& xml_node_iterator::operator*() const
  5302. {
  5303. assert(_wrap._root);
  5304. return _wrap;
  5305. }
  5306. PUGI__FN xml_node* xml_node_iterator::operator->() const
  5307. {
  5308. assert(_wrap._root);
  5309. return const_cast<xml_node*>(&_wrap); // BCC5 workaround
  5310. }
  5311. PUGI__FN xml_node_iterator& xml_node_iterator::operator++()
  5312. {
  5313. assert(_wrap._root);
  5314. _wrap._root = _wrap._root->next_sibling;
  5315. return *this;
  5316. }
  5317. PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
  5318. {
  5319. xml_node_iterator temp = *this;
  5320. ++*this;
  5321. return temp;
  5322. }
  5323. PUGI__FN xml_node_iterator& xml_node_iterator::operator--()
  5324. {
  5325. _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
  5326. return *this;
  5327. }
  5328. PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
  5329. {
  5330. xml_node_iterator temp = *this;
  5331. --*this;
  5332. return temp;
  5333. }
  5334. PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
  5335. {
  5336. }
  5337. PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
  5338. {
  5339. }
  5340. PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
  5341. {
  5342. }
  5343. PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
  5344. {
  5345. return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
  5346. }
  5347. PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
  5348. {
  5349. return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
  5350. }
  5351. PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
  5352. {
  5353. assert(_wrap._attr);
  5354. return _wrap;
  5355. }
  5356. PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
  5357. {
  5358. assert(_wrap._attr);
  5359. return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
  5360. }
  5361. PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator++()
  5362. {
  5363. assert(_wrap._attr);
  5364. _wrap._attr = _wrap._attr->next_attribute;
  5365. return *this;
  5366. }
  5367. PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
  5368. {
  5369. xml_attribute_iterator temp = *this;
  5370. ++*this;
  5371. return temp;
  5372. }
  5373. PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator--()
  5374. {
  5375. _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
  5376. return *this;
  5377. }
  5378. PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
  5379. {
  5380. xml_attribute_iterator temp = *this;
  5381. --*this;
  5382. return temp;
  5383. }
  5384. PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
  5385. {
  5386. }
  5387. PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
  5388. {
  5389. }
  5390. PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
  5391. {
  5392. }
  5393. PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
  5394. {
  5395. return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
  5396. }
  5397. PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
  5398. {
  5399. return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
  5400. }
  5401. PUGI__FN xml_node& xml_named_node_iterator::operator*() const
  5402. {
  5403. assert(_wrap._root);
  5404. return _wrap;
  5405. }
  5406. PUGI__FN xml_node* xml_named_node_iterator::operator->() const
  5407. {
  5408. assert(_wrap._root);
  5409. return const_cast<xml_node*>(&_wrap); // BCC5 workaround
  5410. }
  5411. PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator++()
  5412. {
  5413. assert(_wrap._root);
  5414. _wrap = _wrap.next_sibling(_name);
  5415. return *this;
  5416. }
  5417. PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
  5418. {
  5419. xml_named_node_iterator temp = *this;
  5420. ++*this;
  5421. return temp;
  5422. }
  5423. PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator--()
  5424. {
  5425. if (_wrap._root)
  5426. _wrap = _wrap.previous_sibling(_name);
  5427. else
  5428. {
  5429. _wrap = _parent.last_child();
  5430. if (!impl::strequal(_wrap.name(), _name))
  5431. _wrap = _wrap.previous_sibling(_name);
  5432. }
  5433. return *this;
  5434. }
  5435. PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
  5436. {
  5437. xml_named_node_iterator temp = *this;
  5438. --*this;
  5439. return temp;
  5440. }
  5441. PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
  5442. {
  5443. }
  5444. PUGI__FN xml_parse_result::operator bool() const
  5445. {
  5446. return status == status_ok;
  5447. }
  5448. PUGI__FN const char* xml_parse_result::description() const
  5449. {
  5450. switch (status)
  5451. {
  5452. case status_ok: return "No error";
  5453. case status_file_not_found: return "File was not found";
  5454. case status_io_error: return "Error reading from file/stream";
  5455. case status_out_of_memory: return "Could not allocate memory";
  5456. case status_internal_error: return "Internal error occurred";
  5457. case status_unrecognized_tag: return "Could not determine tag type";
  5458. case status_bad_pi: return "Error parsing document declaration/processing instruction";
  5459. case status_bad_comment: return "Error parsing comment";
  5460. case status_bad_cdata: return "Error parsing CDATA section";
  5461. case status_bad_doctype: return "Error parsing document type declaration";
  5462. case status_bad_pcdata: return "Error parsing PCDATA section";
  5463. case status_bad_start_element: return "Error parsing start element tag";
  5464. case status_bad_attribute: return "Error parsing element attribute";
  5465. case status_bad_end_element: return "Error parsing end element tag";
  5466. case status_end_element_mismatch: return "Start-end tags mismatch";
  5467. case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
  5468. case status_no_document_element: return "No document element found";
  5469. default: return "Unknown error";
  5470. }
  5471. }
  5472. PUGI__FN xml_document::xml_document(): _buffer(0)
  5473. {
  5474. _create();
  5475. }
  5476. PUGI__FN xml_document::~xml_document()
  5477. {
  5478. _destroy();
  5479. }
  5480. #ifdef PUGIXML_HAS_MOVE
  5481. PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0)
  5482. {
  5483. _create();
  5484. _move(rhs);
  5485. }
  5486. PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
  5487. {
  5488. if (this == &rhs) return *this;
  5489. _destroy();
  5490. _create();
  5491. _move(rhs);
  5492. return *this;
  5493. }
  5494. #endif
  5495. PUGI__FN void xml_document::reset()
  5496. {
  5497. _destroy();
  5498. _create();
  5499. }
  5500. PUGI__FN void xml_document::reset(const xml_document& proto)
  5501. {
  5502. reset();
  5503. impl::node_copy_tree(_root, proto._root);
  5504. }
  5505. PUGI__FN void xml_document::_create()
  5506. {
  5507. assert(!_root);
  5508. #ifdef PUGIXML_COMPACT
  5509. // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit
  5510. const size_t page_offset = sizeof(void*);
  5511. #else
  5512. const size_t page_offset = 0;
  5513. #endif
  5514. // initialize sentinel page
  5515. PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
  5516. // prepare page structure
  5517. impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
  5518. assert(page);
  5519. page->busy_size = impl::xml_memory_page_size;
  5520. // setup first page marker
  5521. #ifdef PUGIXML_COMPACT
  5522. // round-trip through void* to avoid 'cast increases required alignment of target type' warning
  5523. page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
  5524. *page->compact_page_marker = sizeof(impl::xml_memory_page);
  5525. #endif
  5526. // allocate new root
  5527. _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
  5528. _root->prev_sibling_c = _root;
  5529. // setup sentinel page
  5530. page->allocator = static_cast<impl::xml_document_struct*>(_root);
  5531. // setup hash table pointer in allocator
  5532. #ifdef PUGIXML_COMPACT
  5533. page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash;
  5534. #endif
  5535. // verify the document allocation
  5536. assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
  5537. }
  5538. PUGI__FN void xml_document::_destroy()
  5539. {
  5540. assert(_root);
  5541. // destroy static storage
  5542. if (_buffer)
  5543. {
  5544. impl::xml_memory::deallocate(_buffer);
  5545. _buffer = 0;
  5546. }
  5547. // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
  5548. for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
  5549. {
  5550. if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
  5551. }
  5552. // destroy dynamic storage, leave sentinel page (it's in static memory)
  5553. impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
  5554. assert(root_page && !root_page->prev);
  5555. assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
  5556. for (impl::xml_memory_page* page = root_page->next; page; )
  5557. {
  5558. impl::xml_memory_page* next = page->next;
  5559. impl::xml_allocator::deallocate_page(page);
  5560. page = next;
  5561. }
  5562. #ifdef PUGIXML_COMPACT
  5563. // destroy hash table
  5564. static_cast<impl::xml_document_struct*>(_root)->hash.clear();
  5565. #endif
  5566. _root = 0;
  5567. }
  5568. #ifdef PUGIXML_HAS_MOVE
  5569. PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
  5570. {
  5571. impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root);
  5572. impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root);
  5573. // save first child pointer for later; this needs hash access
  5574. xml_node_struct* other_first_child = other->first_child;
  5575. #ifdef PUGIXML_COMPACT
  5576. // reserve space for the hash table up front; this is the only operation that can fail
  5577. // if it does, we have no choice but to throw (if we have exceptions)
  5578. if (other_first_child)
  5579. {
  5580. size_t other_children = 0;
  5581. for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
  5582. other_children++;
  5583. // in compact mode, each pointer assignment could result in a hash table request
  5584. // during move, we have to relocate document first_child and parents of all children
  5585. // normally there's just one child and its parent has a pointerless encoding but
  5586. // we assume the worst here
  5587. if (!other->_hash->reserve(other_children + 1))
  5588. {
  5589. #ifdef PUGIXML_NO_EXCEPTIONS
  5590. return;
  5591. #else
  5592. throw std::bad_alloc();
  5593. #endif
  5594. }
  5595. }
  5596. #endif
  5597. // move allocation state
  5598. // note that other->_root may point to the embedded document page, in which case we should keep original (empty) state
  5599. if (other->_root != PUGI__GETPAGE(other))
  5600. {
  5601. doc->_root = other->_root;
  5602. doc->_busy_size = other->_busy_size;
  5603. }
  5604. // move buffer state
  5605. doc->buffer = other->buffer;
  5606. doc->extra_buffers = other->extra_buffers;
  5607. _buffer = rhs._buffer;
  5608. #ifdef PUGIXML_COMPACT
  5609. // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child
  5610. doc->hash = other->hash;
  5611. doc->_hash = &doc->hash;
  5612. // make sure we don't access other hash up until the end when we reinitialize other document
  5613. other->_hash = 0;
  5614. #endif
  5615. // move page structure
  5616. impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc);
  5617. assert(doc_page && !doc_page->prev && !doc_page->next);
  5618. impl::xml_memory_page* other_page = PUGI__GETPAGE(other);
  5619. assert(other_page && !other_page->prev);
  5620. // relink pages since root page is embedded into xml_document
  5621. if (impl::xml_memory_page* page = other_page->next)
  5622. {
  5623. assert(page->prev == other_page);
  5624. page->prev = doc_page;
  5625. doc_page->next = page;
  5626. other_page->next = 0;
  5627. }
  5628. // make sure pages point to the correct document state
  5629. for (impl::xml_memory_page* page = doc_page->next; page; page = page->next)
  5630. {
  5631. assert(page->allocator == other);
  5632. page->allocator = doc;
  5633. #ifdef PUGIXML_COMPACT
  5634. // this automatically migrates most children between documents and prevents ->parent assignment from allocating
  5635. if (page->compact_shared_parent == other)
  5636. page->compact_shared_parent = doc;
  5637. #endif
  5638. }
  5639. // move tree structure
  5640. assert(!doc->first_child);
  5641. doc->first_child = other_first_child;
  5642. for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
  5643. {
  5644. #ifdef PUGIXML_COMPACT
  5645. // most children will have migrated when we reassigned compact_shared_parent
  5646. assert(node->parent == other || node->parent == doc);
  5647. node->parent = doc;
  5648. #else
  5649. assert(node->parent == other);
  5650. node->parent = doc;
  5651. #endif
  5652. }
  5653. // reset other document
  5654. new (other) impl::xml_document_struct(PUGI__GETPAGE(other));
  5655. rhs._buffer = 0;
  5656. }
  5657. #endif
  5658. #ifndef PUGIXML_NO_STL
  5659. PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
  5660. {
  5661. reset();
  5662. return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
  5663. }
  5664. PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
  5665. {
  5666. reset();
  5667. return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
  5668. }
  5669. #endif
  5670. PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
  5671. {
  5672. // Force native encoding (skip autodetection)
  5673. #ifdef PUGIXML_WCHAR_MODE
  5674. xml_encoding encoding = encoding_wchar;
  5675. #else
  5676. xml_encoding encoding = encoding_utf8;
  5677. #endif
  5678. return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
  5679. }
  5680. PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
  5681. {
  5682. return load_string(contents, options);
  5683. }
  5684. PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
  5685. {
  5686. reset();
  5687. using impl::auto_deleter; // MSVC7 workaround
  5688. auto_deleter<FILE> file(impl::open_file(path_, "rb"), impl::close_file);
  5689. return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
  5690. }
  5691. PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
  5692. {
  5693. reset();
  5694. using impl::auto_deleter; // MSVC7 workaround
  5695. auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
  5696. return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
  5697. }
  5698. PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
  5699. {
  5700. reset();
  5701. return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
  5702. }
  5703. PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
  5704. {
  5705. reset();
  5706. return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
  5707. }
  5708. PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
  5709. {
  5710. reset();
  5711. return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
  5712. }
  5713. PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
  5714. {
  5715. impl::xml_buffered_writer buffered_writer(writer, encoding);
  5716. if ((flags & format_write_bom) && encoding != encoding_latin1)
  5717. {
  5718. // BOM always represents the codepoint U+FEFF, so just write it in native encoding
  5719. #ifdef PUGIXML_WCHAR_MODE
  5720. unsigned int bom = 0xfeff;
  5721. buffered_writer.write(static_cast<wchar_t>(bom));
  5722. #else
  5723. buffered_writer.write('\xef', '\xbb', '\xbf');
  5724. #endif
  5725. }
  5726. if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
  5727. {
  5728. buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
  5729. if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
  5730. buffered_writer.write('?', '>');
  5731. if (!(flags & format_raw)) buffered_writer.write('\n');
  5732. }
  5733. impl::node_output(buffered_writer, _root, indent, flags, 0);
  5734. buffered_writer.flush();
  5735. }
  5736. #ifndef PUGIXML_NO_STL
  5737. PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
  5738. {
  5739. xml_writer_stream writer(stream);
  5740. save(writer, indent, flags, encoding);
  5741. }
  5742. PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
  5743. {
  5744. xml_writer_stream writer(stream);
  5745. save(writer, indent, flags, encoding_wchar);
  5746. }
  5747. #endif
  5748. PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
  5749. {
  5750. using impl::auto_deleter; // MSVC7 workaround
  5751. auto_deleter<FILE> file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
  5752. return impl::save_file_impl(*this, file.data, indent, flags, encoding);
  5753. }
  5754. PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
  5755. {
  5756. using impl::auto_deleter; // MSVC7 workaround
  5757. auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file);
  5758. return impl::save_file_impl(*this, file.data, indent, flags, encoding);
  5759. }
  5760. PUGI__FN xml_node xml_document::document_element() const
  5761. {
  5762. assert(_root);
  5763. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  5764. if (PUGI__NODETYPE(i) == node_element)
  5765. return xml_node(i);
  5766. return xml_node();
  5767. }
  5768. #ifndef PUGIXML_NO_STL
  5769. PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
  5770. {
  5771. assert(str);
  5772. return impl::as_utf8_impl(str, impl::strlength_wide(str));
  5773. }
  5774. PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
  5775. {
  5776. return impl::as_utf8_impl(str.c_str(), str.size());
  5777. }
  5778. PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
  5779. {
  5780. assert(str);
  5781. return impl::as_wide_impl(str, strlen(str));
  5782. }
  5783. PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
  5784. {
  5785. return impl::as_wide_impl(str.c_str(), str.size());
  5786. }
  5787. #endif
  5788. PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
  5789. {
  5790. impl::xml_memory::allocate = allocate;
  5791. impl::xml_memory::deallocate = deallocate;
  5792. }
  5793. PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
  5794. {
  5795. return impl::xml_memory::allocate;
  5796. }
  5797. PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
  5798. {
  5799. return impl::xml_memory::deallocate;
  5800. }
  5801. }
  5802. #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
  5803. namespace std
  5804. {
  5805. // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
  5806. PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
  5807. {
  5808. return std::bidirectional_iterator_tag();
  5809. }
  5810. PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
  5811. {
  5812. return std::bidirectional_iterator_tag();
  5813. }
  5814. PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
  5815. {
  5816. return std::bidirectional_iterator_tag();
  5817. }
  5818. }
  5819. #endif
  5820. #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
  5821. namespace std
  5822. {
  5823. // Workarounds for (non-standard) iterator category detection
  5824. PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
  5825. {
  5826. return std::bidirectional_iterator_tag();
  5827. }
  5828. PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
  5829. {
  5830. return std::bidirectional_iterator_tag();
  5831. }
  5832. PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
  5833. {
  5834. return std::bidirectional_iterator_tag();
  5835. }
  5836. }
  5837. #endif
  5838. #ifndef PUGIXML_NO_XPATH
  5839. // STL replacements
  5840. PUGI__NS_BEGIN
  5841. struct equal_to
  5842. {
  5843. template <typename T> bool operator()(const T& lhs, const T& rhs) const
  5844. {
  5845. return lhs == rhs;
  5846. }
  5847. };
  5848. struct not_equal_to
  5849. {
  5850. template <typename T> bool operator()(const T& lhs, const T& rhs) const
  5851. {
  5852. return lhs != rhs;
  5853. }
  5854. };
  5855. struct less
  5856. {
  5857. template <typename T> bool operator()(const T& lhs, const T& rhs) const
  5858. {
  5859. return lhs < rhs;
  5860. }
  5861. };
  5862. struct less_equal
  5863. {
  5864. template <typename T> bool operator()(const T& lhs, const T& rhs) const
  5865. {
  5866. return lhs <= rhs;
  5867. }
  5868. };
  5869. template <typename T> inline void swap(T& lhs, T& rhs)
  5870. {
  5871. T temp = lhs;
  5872. lhs = rhs;
  5873. rhs = temp;
  5874. }
  5875. template <typename I, typename Pred> PUGI__FN I min_element(I begin, I end, const Pred& pred)
  5876. {
  5877. I result = begin;
  5878. for (I it = begin + 1; it != end; ++it)
  5879. if (pred(*it, *result))
  5880. result = it;
  5881. return result;
  5882. }
  5883. template <typename I> PUGI__FN void reverse(I begin, I end)
  5884. {
  5885. while (end - begin > 1)
  5886. swap(*begin++, *--end);
  5887. }
  5888. template <typename I> PUGI__FN I unique(I begin, I end)
  5889. {
  5890. // fast skip head
  5891. while (end - begin > 1 && *begin != *(begin + 1))
  5892. begin++;
  5893. if (begin == end)
  5894. return begin;
  5895. // last written element
  5896. I write = begin++;
  5897. // merge unique elements
  5898. while (begin != end)
  5899. {
  5900. if (*begin != *write)
  5901. *++write = *begin++;
  5902. else
  5903. begin++;
  5904. }
  5905. // past-the-end (write points to live element)
  5906. return write + 1;
  5907. }
  5908. template <typename T, typename Pred> PUGI__FN void insertion_sort(T* begin, T* end, const Pred& pred)
  5909. {
  5910. if (begin == end)
  5911. return;
  5912. for (T* it = begin + 1; it != end; ++it)
  5913. {
  5914. T val = *it;
  5915. T* hole = it;
  5916. // move hole backwards
  5917. while (hole > begin && pred(val, *(hole - 1)))
  5918. {
  5919. *hole = *(hole - 1);
  5920. hole--;
  5921. }
  5922. // fill hole with element
  5923. *hole = val;
  5924. }
  5925. }
  5926. template <typename I, typename Pred> inline I median3(I first, I middle, I last, const Pred& pred)
  5927. {
  5928. if (pred(*middle, *first))
  5929. swap(middle, first);
  5930. if (pred(*last, *middle))
  5931. swap(last, middle);
  5932. if (pred(*middle, *first))
  5933. swap(middle, first);
  5934. return middle;
  5935. }
  5936. template <typename T, typename Pred> PUGI__FN void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend)
  5937. {
  5938. // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups)
  5939. T* eq = begin;
  5940. T* lt = begin;
  5941. T* gt = end;
  5942. while (lt < gt)
  5943. {
  5944. if (pred(*lt, pivot))
  5945. lt++;
  5946. else if (*lt == pivot)
  5947. swap(*eq++, *lt++);
  5948. else
  5949. swap(*lt, *--gt);
  5950. }
  5951. // we now have just 4 groups: = < >; move equal elements to the middle
  5952. T* eqbeg = gt;
  5953. for (T* it = begin; it != eq; ++it)
  5954. swap(*it, *--eqbeg);
  5955. *out_eqbeg = eqbeg;
  5956. *out_eqend = gt;
  5957. }
  5958. template <typename I, typename Pred> PUGI__FN void sort(I begin, I end, const Pred& pred)
  5959. {
  5960. // sort large chunks
  5961. while (end - begin > 16)
  5962. {
  5963. // find median element
  5964. I middle = begin + (end - begin) / 2;
  5965. I median = median3(begin, middle, end - 1, pred);
  5966. // partition in three chunks (< = >)
  5967. I eqbeg, eqend;
  5968. partition3(begin, end, *median, pred, &eqbeg, &eqend);
  5969. // loop on larger half
  5970. if (eqbeg - begin > end - eqend)
  5971. {
  5972. sort(eqend, end, pred);
  5973. end = eqbeg;
  5974. }
  5975. else
  5976. {
  5977. sort(begin, eqbeg, pred);
  5978. begin = eqend;
  5979. }
  5980. }
  5981. // insertion sort small chunk
  5982. insertion_sort(begin, end, pred);
  5983. }
  5984. PUGI__FN bool hash_insert(const void** table, size_t size, const void* key)
  5985. {
  5986. assert(key);
  5987. unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
  5988. // MurmurHash3 32-bit finalizer
  5989. h ^= h >> 16;
  5990. h *= 0x85ebca6bu;
  5991. h ^= h >> 13;
  5992. h *= 0xc2b2ae35u;
  5993. h ^= h >> 16;
  5994. size_t hashmod = size - 1;
  5995. size_t bucket = h & hashmod;
  5996. for (size_t probe = 0; probe <= hashmod; ++probe)
  5997. {
  5998. if (table[bucket] == 0)
  5999. {
  6000. table[bucket] = key;
  6001. return true;
  6002. }
  6003. if (table[bucket] == key)
  6004. return false;
  6005. // hash collision, quadratic probing
  6006. bucket = (bucket + probe + 1) & hashmod;
  6007. }
  6008. assert(false && "Hash table is full"); // unreachable
  6009. return false;
  6010. }
  6011. PUGI__NS_END
  6012. // Allocator used for AST and evaluation stacks
  6013. PUGI__NS_BEGIN
  6014. static const size_t xpath_memory_page_size =
  6015. #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
  6016. PUGIXML_MEMORY_XPATH_PAGE_SIZE
  6017. #else
  6018. 4096
  6019. #endif
  6020. ;
  6021. static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
  6022. struct xpath_memory_block
  6023. {
  6024. xpath_memory_block* next;
  6025. size_t capacity;
  6026. union
  6027. {
  6028. char data[xpath_memory_page_size];
  6029. double alignment;
  6030. };
  6031. };
  6032. struct xpath_allocator
  6033. {
  6034. xpath_memory_block* _root;
  6035. size_t _root_size;
  6036. bool* _error;
  6037. xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error)
  6038. {
  6039. }
  6040. void* allocate(size_t size)
  6041. {
  6042. // round size up to block alignment boundary
  6043. size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
  6044. if (_root_size + size <= _root->capacity)
  6045. {
  6046. void* buf = &_root->data[0] + _root_size;
  6047. _root_size += size;
  6048. return buf;
  6049. }
  6050. else
  6051. {
  6052. // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
  6053. size_t block_capacity_base = sizeof(_root->data);
  6054. size_t block_capacity_req = size + block_capacity_base / 4;
  6055. size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
  6056. size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
  6057. xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
  6058. if (!block)
  6059. {
  6060. if (_error) *_error = true;
  6061. return 0;
  6062. }
  6063. block->next = _root;
  6064. block->capacity = block_capacity;
  6065. _root = block;
  6066. _root_size = size;
  6067. return block->data;
  6068. }
  6069. }
  6070. void* reallocate(void* ptr, size_t old_size, size_t new_size)
  6071. {
  6072. // round size up to block alignment boundary
  6073. old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
  6074. new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
  6075. // we can only reallocate the last object
  6076. assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
  6077. // try to reallocate the object inplace
  6078. if (ptr && _root_size - old_size + new_size <= _root->capacity)
  6079. {
  6080. _root_size = _root_size - old_size + new_size;
  6081. return ptr;
  6082. }
  6083. // allocate a new block
  6084. void* result = allocate(new_size);
  6085. if (!result) return 0;
  6086. // we have a new block
  6087. if (ptr)
  6088. {
  6089. // copy old data (we only support growing)
  6090. assert(new_size >= old_size);
  6091. memcpy(result, ptr, old_size);
  6092. // free the previous page if it had no other objects
  6093. assert(_root->data == result);
  6094. assert(_root->next);
  6095. if (_root->next->data == ptr)
  6096. {
  6097. // deallocate the whole page, unless it was the first one
  6098. xpath_memory_block* next = _root->next->next;
  6099. if (next)
  6100. {
  6101. xml_memory::deallocate(_root->next);
  6102. _root->next = next;
  6103. }
  6104. }
  6105. }
  6106. return result;
  6107. }
  6108. void revert(const xpath_allocator& state)
  6109. {
  6110. // free all new pages
  6111. xpath_memory_block* cur = _root;
  6112. while (cur != state._root)
  6113. {
  6114. xpath_memory_block* next = cur->next;
  6115. xml_memory::deallocate(cur);
  6116. cur = next;
  6117. }
  6118. // restore state
  6119. _root = state._root;
  6120. _root_size = state._root_size;
  6121. }
  6122. void release()
  6123. {
  6124. xpath_memory_block* cur = _root;
  6125. assert(cur);
  6126. while (cur->next)
  6127. {
  6128. xpath_memory_block* next = cur->next;
  6129. xml_memory::deallocate(cur);
  6130. cur = next;
  6131. }
  6132. }
  6133. };
  6134. struct xpath_allocator_capture
  6135. {
  6136. xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
  6137. {
  6138. }
  6139. ~xpath_allocator_capture()
  6140. {
  6141. _target->revert(_state);
  6142. }
  6143. xpath_allocator* _target;
  6144. xpath_allocator _state;
  6145. };
  6146. struct xpath_stack
  6147. {
  6148. xpath_allocator* result;
  6149. xpath_allocator* temp;
  6150. };
  6151. struct xpath_stack_data
  6152. {
  6153. xpath_memory_block blocks[2];
  6154. xpath_allocator result;
  6155. xpath_allocator temp;
  6156. xpath_stack stack;
  6157. bool oom;
  6158. xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false)
  6159. {
  6160. blocks[0].next = blocks[1].next = 0;
  6161. blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
  6162. stack.result = &result;
  6163. stack.temp = &temp;
  6164. }
  6165. ~xpath_stack_data()
  6166. {
  6167. result.release();
  6168. temp.release();
  6169. }
  6170. };
  6171. PUGI__NS_END
  6172. // String class
  6173. PUGI__NS_BEGIN
  6174. class xpath_string
  6175. {
  6176. const char_t* _buffer;
  6177. bool _uses_heap;
  6178. size_t _length_heap;
  6179. static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
  6180. {
  6181. char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
  6182. if (!result) return 0;
  6183. memcpy(result, string, length * sizeof(char_t));
  6184. result[length] = 0;
  6185. return result;
  6186. }
  6187. xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
  6188. {
  6189. }
  6190. public:
  6191. static xpath_string from_const(const char_t* str)
  6192. {
  6193. return xpath_string(str, false, 0);
  6194. }
  6195. static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
  6196. {
  6197. assert(begin <= end && *end == 0);
  6198. return xpath_string(begin, true, static_cast<size_t>(end - begin));
  6199. }
  6200. static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
  6201. {
  6202. assert(begin <= end);
  6203. if (begin == end)
  6204. return xpath_string();
  6205. size_t length = static_cast<size_t>(end - begin);
  6206. const char_t* data = duplicate_string(begin, length, alloc);
  6207. return data ? xpath_string(data, true, length) : xpath_string();
  6208. }
  6209. xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
  6210. {
  6211. }
  6212. void append(const xpath_string& o, xpath_allocator* alloc)
  6213. {
  6214. // skip empty sources
  6215. if (!*o._buffer) return;
  6216. // fast append for constant empty target and constant source
  6217. if (!*_buffer && !_uses_heap && !o._uses_heap)
  6218. {
  6219. _buffer = o._buffer;
  6220. }
  6221. else
  6222. {
  6223. // need to make heap copy
  6224. size_t target_length = length();
  6225. size_t source_length = o.length();
  6226. size_t result_length = target_length + source_length;
  6227. // allocate new buffer
  6228. char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
  6229. if (!result) return;
  6230. // append first string to the new buffer in case there was no reallocation
  6231. if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
  6232. // append second string to the new buffer
  6233. memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
  6234. result[result_length] = 0;
  6235. // finalize
  6236. _buffer = result;
  6237. _uses_heap = true;
  6238. _length_heap = result_length;
  6239. }
  6240. }
  6241. const char_t* c_str() const
  6242. {
  6243. return _buffer;
  6244. }
  6245. size_t length() const
  6246. {
  6247. return _uses_heap ? _length_heap : strlength(_buffer);
  6248. }
  6249. char_t* data(xpath_allocator* alloc)
  6250. {
  6251. // make private heap copy
  6252. if (!_uses_heap)
  6253. {
  6254. size_t length_ = strlength(_buffer);
  6255. const char_t* data_ = duplicate_string(_buffer, length_, alloc);
  6256. if (!data_) return 0;
  6257. _buffer = data_;
  6258. _uses_heap = true;
  6259. _length_heap = length_;
  6260. }
  6261. return const_cast<char_t*>(_buffer);
  6262. }
  6263. bool empty() const
  6264. {
  6265. return *_buffer == 0;
  6266. }
  6267. bool operator==(const xpath_string& o) const
  6268. {
  6269. return strequal(_buffer, o._buffer);
  6270. }
  6271. bool operator!=(const xpath_string& o) const
  6272. {
  6273. return !strequal(_buffer, o._buffer);
  6274. }
  6275. bool uses_heap() const
  6276. {
  6277. return _uses_heap;
  6278. }
  6279. };
  6280. PUGI__NS_END
  6281. PUGI__NS_BEGIN
  6282. PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
  6283. {
  6284. while (*pattern && *string == *pattern)
  6285. {
  6286. string++;
  6287. pattern++;
  6288. }
  6289. return *pattern == 0;
  6290. }
  6291. PUGI__FN const char_t* find_char(const char_t* s, char_t c)
  6292. {
  6293. #ifdef PUGIXML_WCHAR_MODE
  6294. return wcschr(s, c);
  6295. #else
  6296. return strchr(s, c);
  6297. #endif
  6298. }
  6299. PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
  6300. {
  6301. #ifdef PUGIXML_WCHAR_MODE
  6302. // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
  6303. return (*p == 0) ? s : wcsstr(s, p);
  6304. #else
  6305. return strstr(s, p);
  6306. #endif
  6307. }
  6308. // Converts symbol to lower case, if it is an ASCII one
  6309. PUGI__FN char_t tolower_ascii(char_t ch)
  6310. {
  6311. return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
  6312. }
  6313. PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
  6314. {
  6315. if (na.attribute())
  6316. return xpath_string::from_const(na.attribute().value());
  6317. else
  6318. {
  6319. xml_node n = na.node();
  6320. switch (n.type())
  6321. {
  6322. case node_pcdata:
  6323. case node_cdata:
  6324. case node_comment:
  6325. case node_pi:
  6326. return xpath_string::from_const(n.value());
  6327. case node_document:
  6328. case node_element:
  6329. {
  6330. xpath_string result;
  6331. // element nodes can have value if parse_embed_pcdata was used
  6332. if (n.value()[0])
  6333. result.append(xpath_string::from_const(n.value()), alloc);
  6334. xml_node cur = n.first_child();
  6335. while (cur && cur != n)
  6336. {
  6337. if (cur.type() == node_pcdata || cur.type() == node_cdata)
  6338. result.append(xpath_string::from_const(cur.value()), alloc);
  6339. if (cur.first_child())
  6340. cur = cur.first_child();
  6341. else if (cur.next_sibling())
  6342. cur = cur.next_sibling();
  6343. else
  6344. {
  6345. while (!cur.next_sibling() && cur != n)
  6346. cur = cur.parent();
  6347. if (cur != n) cur = cur.next_sibling();
  6348. }
  6349. }
  6350. return result;
  6351. }
  6352. default:
  6353. return xpath_string();
  6354. }
  6355. }
  6356. }
  6357. PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
  6358. {
  6359. assert(ln->parent == rn->parent);
  6360. // there is no common ancestor (the shared parent is null), nodes are from different documents
  6361. if (!ln->parent) return ln < rn;
  6362. // determine sibling order
  6363. xml_node_struct* ls = ln;
  6364. xml_node_struct* rs = rn;
  6365. while (ls && rs)
  6366. {
  6367. if (ls == rn) return true;
  6368. if (rs == ln) return false;
  6369. ls = ls->next_sibling;
  6370. rs = rs->next_sibling;
  6371. }
  6372. // if rn sibling chain ended ln must be before rn
  6373. return !rs;
  6374. }
  6375. PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
  6376. {
  6377. // find common ancestor at the same depth, if any
  6378. xml_node_struct* lp = ln;
  6379. xml_node_struct* rp = rn;
  6380. while (lp && rp && lp->parent != rp->parent)
  6381. {
  6382. lp = lp->parent;
  6383. rp = rp->parent;
  6384. }
  6385. // parents are the same!
  6386. if (lp && rp) return node_is_before_sibling(lp, rp);
  6387. // nodes are at different depths, need to normalize heights
  6388. bool left_higher = !lp;
  6389. while (lp)
  6390. {
  6391. lp = lp->parent;
  6392. ln = ln->parent;
  6393. }
  6394. while (rp)
  6395. {
  6396. rp = rp->parent;
  6397. rn = rn->parent;
  6398. }
  6399. // one node is the ancestor of the other
  6400. if (ln == rn) return left_higher;
  6401. // find common ancestor... again
  6402. while (ln->parent != rn->parent)
  6403. {
  6404. ln = ln->parent;
  6405. rn = rn->parent;
  6406. }
  6407. return node_is_before_sibling(ln, rn);
  6408. }
  6409. PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
  6410. {
  6411. while (node && node != parent) node = node->parent;
  6412. return parent && node == parent;
  6413. }
  6414. PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
  6415. {
  6416. xml_node_struct* node = xnode.node().internal_object();
  6417. if (node)
  6418. {
  6419. if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
  6420. {
  6421. if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
  6422. if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
  6423. }
  6424. return 0;
  6425. }
  6426. xml_attribute_struct* attr = xnode.attribute().internal_object();
  6427. if (attr)
  6428. {
  6429. if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
  6430. {
  6431. if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
  6432. if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
  6433. }
  6434. return 0;
  6435. }
  6436. return 0;
  6437. }
  6438. struct document_order_comparator
  6439. {
  6440. bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
  6441. {
  6442. // optimized document order based check
  6443. const void* lo = document_buffer_order(lhs);
  6444. const void* ro = document_buffer_order(rhs);
  6445. if (lo && ro) return lo < ro;
  6446. // slow comparison
  6447. xml_node ln = lhs.node(), rn = rhs.node();
  6448. // compare attributes
  6449. if (lhs.attribute() && rhs.attribute())
  6450. {
  6451. // shared parent
  6452. if (lhs.parent() == rhs.parent())
  6453. {
  6454. // determine sibling order
  6455. for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
  6456. if (a == rhs.attribute())
  6457. return true;
  6458. return false;
  6459. }
  6460. // compare attribute parents
  6461. ln = lhs.parent();
  6462. rn = rhs.parent();
  6463. }
  6464. else if (lhs.attribute())
  6465. {
  6466. // attributes go after the parent element
  6467. if (lhs.parent() == rhs.node()) return false;
  6468. ln = lhs.parent();
  6469. }
  6470. else if (rhs.attribute())
  6471. {
  6472. // attributes go after the parent element
  6473. if (rhs.parent() == lhs.node()) return true;
  6474. rn = rhs.parent();
  6475. }
  6476. if (ln == rn) return false;
  6477. if (!ln || !rn) return ln < rn;
  6478. return node_is_before(ln.internal_object(), rn.internal_object());
  6479. }
  6480. };
  6481. PUGI__FN double gen_nan()
  6482. {
  6483. #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
  6484. PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
  6485. typedef uint32_t UI; // BCC5 workaround
  6486. union { float f; UI i; } u;
  6487. u.i = 0x7fc00000;
  6488. return double(u.f);
  6489. #else
  6490. // fallback
  6491. const volatile double zero = 0.0;
  6492. return zero / zero;
  6493. #endif
  6494. }
  6495. PUGI__FN bool is_nan(double value)
  6496. {
  6497. #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
  6498. return !!_isnan(value);
  6499. #elif defined(fpclassify) && defined(FP_NAN)
  6500. return fpclassify(value) == FP_NAN;
  6501. #else
  6502. // fallback
  6503. const volatile double v = value;
  6504. return v != v;
  6505. #endif
  6506. }
  6507. PUGI__FN const char_t* convert_number_to_string_special(double value)
  6508. {
  6509. #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
  6510. if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
  6511. if (_isnan(value)) return PUGIXML_TEXT("NaN");
  6512. return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
  6513. #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
  6514. switch (fpclassify(value))
  6515. {
  6516. case FP_NAN:
  6517. return PUGIXML_TEXT("NaN");
  6518. case FP_INFINITE:
  6519. return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
  6520. case FP_ZERO:
  6521. return PUGIXML_TEXT("0");
  6522. default:
  6523. return 0;
  6524. }
  6525. #else
  6526. // fallback
  6527. const volatile double v = value;
  6528. if (v == 0) return PUGIXML_TEXT("0");
  6529. if (v != v) return PUGIXML_TEXT("NaN");
  6530. if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
  6531. return 0;
  6532. #endif
  6533. }
  6534. PUGI__FN bool convert_number_to_boolean(double value)
  6535. {
  6536. return (value != 0 && !is_nan(value));
  6537. }
  6538. PUGI__FN void truncate_zeros(char* begin, char* end)
  6539. {
  6540. while (begin != end && end[-1] == '0') end--;
  6541. *end = 0;
  6542. }
  6543. // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
  6544. #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
  6545. PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
  6546. {
  6547. // get base values
  6548. int sign, exponent;
  6549. _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign);
  6550. // truncate redundant zeros
  6551. truncate_zeros(buffer, buffer + strlen(buffer));
  6552. // fill results
  6553. *out_mantissa = buffer;
  6554. *out_exponent = exponent;
  6555. }
  6556. #else
  6557. PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
  6558. {
  6559. // get a scientific notation value with IEEE DBL_DIG decimals
  6560. PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value);
  6561. // get the exponent (possibly negative)
  6562. char* exponent_string = strchr(buffer, 'e');
  6563. assert(exponent_string);
  6564. int exponent = atoi(exponent_string + 1);
  6565. // extract mantissa string: skip sign
  6566. char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
  6567. assert(mantissa[0] != '0' && mantissa[1] == '.');
  6568. // divide mantissa by 10 to eliminate integer part
  6569. mantissa[1] = mantissa[0];
  6570. mantissa++;
  6571. exponent++;
  6572. // remove extra mantissa digits and zero-terminate mantissa
  6573. truncate_zeros(mantissa, exponent_string);
  6574. // fill results
  6575. *out_mantissa = mantissa;
  6576. *out_exponent = exponent;
  6577. }
  6578. #endif
  6579. PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
  6580. {
  6581. // try special number conversion
  6582. const char_t* special = convert_number_to_string_special(value);
  6583. if (special) return xpath_string::from_const(special);
  6584. // get mantissa + exponent form
  6585. char mantissa_buffer[32];
  6586. char* mantissa;
  6587. int exponent;
  6588. convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent);
  6589. // allocate a buffer of suitable length for the number
  6590. size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
  6591. char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
  6592. if (!result) return xpath_string();
  6593. // make the number!
  6594. char_t* s = result;
  6595. // sign
  6596. if (value < 0) *s++ = '-';
  6597. // integer part
  6598. if (exponent <= 0)
  6599. {
  6600. *s++ = '0';
  6601. }
  6602. else
  6603. {
  6604. while (exponent > 0)
  6605. {
  6606. assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
  6607. *s++ = *mantissa ? *mantissa++ : '0';
  6608. exponent--;
  6609. }
  6610. }
  6611. // fractional part
  6612. if (*mantissa)
  6613. {
  6614. // decimal point
  6615. *s++ = '.';
  6616. // extra zeroes from negative exponent
  6617. while (exponent < 0)
  6618. {
  6619. *s++ = '0';
  6620. exponent++;
  6621. }
  6622. // extra mantissa digits
  6623. while (*mantissa)
  6624. {
  6625. assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
  6626. *s++ = *mantissa++;
  6627. }
  6628. }
  6629. // zero-terminate
  6630. assert(s < result + result_size);
  6631. *s = 0;
  6632. return xpath_string::from_heap_preallocated(result, s);
  6633. }
  6634. PUGI__FN bool check_string_to_number_format(const char_t* string)
  6635. {
  6636. // parse leading whitespace
  6637. while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
  6638. // parse sign
  6639. if (*string == '-') ++string;
  6640. if (!*string) return false;
  6641. // if there is no integer part, there should be a decimal part with at least one digit
  6642. if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
  6643. // parse integer part
  6644. while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
  6645. // parse decimal part
  6646. if (*string == '.')
  6647. {
  6648. ++string;
  6649. while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
  6650. }
  6651. // parse trailing whitespace
  6652. while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
  6653. return *string == 0;
  6654. }
  6655. PUGI__FN double convert_string_to_number(const char_t* string)
  6656. {
  6657. // check string format
  6658. if (!check_string_to_number_format(string)) return gen_nan();
  6659. // parse string
  6660. #ifdef PUGIXML_WCHAR_MODE
  6661. return wcstod(string, 0);
  6662. #else
  6663. return strtod(string, 0);
  6664. #endif
  6665. }
  6666. PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
  6667. {
  6668. size_t length = static_cast<size_t>(end - begin);
  6669. char_t* scratch = buffer;
  6670. if (length >= sizeof(buffer) / sizeof(buffer[0]))
  6671. {
  6672. // need to make dummy on-heap copy
  6673. scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  6674. if (!scratch) return false;
  6675. }
  6676. // copy string to zero-terminated buffer and perform conversion
  6677. memcpy(scratch, begin, length * sizeof(char_t));
  6678. scratch[length] = 0;
  6679. *out_result = convert_string_to_number(scratch);
  6680. // free dummy buffer
  6681. if (scratch != buffer) xml_memory::deallocate(scratch);
  6682. return true;
  6683. }
  6684. PUGI__FN double round_nearest(double value)
  6685. {
  6686. return floor(value + 0.5);
  6687. }
  6688. PUGI__FN double round_nearest_nzero(double value)
  6689. {
  6690. // same as round_nearest, but returns -0 for [-0.5, -0]
  6691. // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
  6692. return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
  6693. }
  6694. PUGI__FN const char_t* qualified_name(const xpath_node& node)
  6695. {
  6696. return node.attribute() ? node.attribute().name() : node.node().name();
  6697. }
  6698. PUGI__FN const char_t* local_name(const xpath_node& node)
  6699. {
  6700. const char_t* name = qualified_name(node);
  6701. const char_t* p = find_char(name, ':');
  6702. return p ? p + 1 : name;
  6703. }
  6704. struct namespace_uri_predicate
  6705. {
  6706. const char_t* prefix;
  6707. size_t prefix_length;
  6708. namespace_uri_predicate(const char_t* name)
  6709. {
  6710. const char_t* pos = find_char(name, ':');
  6711. prefix = pos ? name : 0;
  6712. prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
  6713. }
  6714. bool operator()(xml_attribute a) const
  6715. {
  6716. const char_t* name = a.name();
  6717. if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
  6718. return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
  6719. }
  6720. };
  6721. PUGI__FN const char_t* namespace_uri(xml_node node)
  6722. {
  6723. namespace_uri_predicate pred = node.name();
  6724. xml_node p = node;
  6725. while (p)
  6726. {
  6727. xml_attribute a = p.find_attribute(pred);
  6728. if (a) return a.value();
  6729. p = p.parent();
  6730. }
  6731. return PUGIXML_TEXT("");
  6732. }
  6733. PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
  6734. {
  6735. namespace_uri_predicate pred = attr.name();
  6736. // Default namespace does not apply to attributes
  6737. if (!pred.prefix) return PUGIXML_TEXT("");
  6738. xml_node p = parent;
  6739. while (p)
  6740. {
  6741. xml_attribute a = p.find_attribute(pred);
  6742. if (a) return a.value();
  6743. p = p.parent();
  6744. }
  6745. return PUGIXML_TEXT("");
  6746. }
  6747. PUGI__FN const char_t* namespace_uri(const xpath_node& node)
  6748. {
  6749. return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
  6750. }
  6751. PUGI__FN char_t* normalize_space(char_t* buffer)
  6752. {
  6753. char_t* write = buffer;
  6754. for (char_t* it = buffer; *it; )
  6755. {
  6756. char_t ch = *it++;
  6757. if (PUGI__IS_CHARTYPE(ch, ct_space))
  6758. {
  6759. // replace whitespace sequence with single space
  6760. while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
  6761. // avoid leading spaces
  6762. if (write != buffer) *write++ = ' ';
  6763. }
  6764. else *write++ = ch;
  6765. }
  6766. // remove trailing space
  6767. if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
  6768. // zero-terminate
  6769. *write = 0;
  6770. return write;
  6771. }
  6772. PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
  6773. {
  6774. char_t* write = buffer;
  6775. while (*buffer)
  6776. {
  6777. PUGI__DMC_VOLATILE char_t ch = *buffer++;
  6778. const char_t* pos = find_char(from, ch);
  6779. if (!pos)
  6780. *write++ = ch; // do not process
  6781. else if (static_cast<size_t>(pos - from) < to_length)
  6782. *write++ = to[pos - from]; // replace
  6783. }
  6784. // zero-terminate
  6785. *write = 0;
  6786. return write;
  6787. }
  6788. PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
  6789. {
  6790. unsigned char table[128] = {0};
  6791. while (*from)
  6792. {
  6793. unsigned int fc = static_cast<unsigned int>(*from);
  6794. unsigned int tc = static_cast<unsigned int>(*to);
  6795. if (fc >= 128 || tc >= 128)
  6796. return 0;
  6797. // code=128 means "skip character"
  6798. if (!table[fc])
  6799. table[fc] = static_cast<unsigned char>(tc ? tc : 128);
  6800. from++;
  6801. if (tc) to++;
  6802. }
  6803. for (int i = 0; i < 128; ++i)
  6804. if (!table[i])
  6805. table[i] = static_cast<unsigned char>(i);
  6806. void* result = alloc->allocate(sizeof(table));
  6807. if (!result) return 0;
  6808. memcpy(result, table, sizeof(table));
  6809. return static_cast<unsigned char*>(result);
  6810. }
  6811. PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
  6812. {
  6813. char_t* write = buffer;
  6814. while (*buffer)
  6815. {
  6816. char_t ch = *buffer++;
  6817. unsigned int index = static_cast<unsigned int>(ch);
  6818. if (index < 128)
  6819. {
  6820. unsigned char code = table[index];
  6821. // code=128 means "skip character" (table size is 128 so 128 can be a special value)
  6822. // this code skips these characters without extra branches
  6823. *write = static_cast<char_t>(code);
  6824. write += 1 - (code >> 7);
  6825. }
  6826. else
  6827. {
  6828. *write++ = ch;
  6829. }
  6830. }
  6831. // zero-terminate
  6832. *write = 0;
  6833. return write;
  6834. }
  6835. inline bool is_xpath_attribute(const char_t* name)
  6836. {
  6837. return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
  6838. }
  6839. struct xpath_variable_boolean: xpath_variable
  6840. {
  6841. xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
  6842. {
  6843. }
  6844. bool value;
  6845. char_t name[1];
  6846. };
  6847. struct xpath_variable_number: xpath_variable
  6848. {
  6849. xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
  6850. {
  6851. }
  6852. double value;
  6853. char_t name[1];
  6854. };
  6855. struct xpath_variable_string: xpath_variable
  6856. {
  6857. xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
  6858. {
  6859. }
  6860. ~xpath_variable_string()
  6861. {
  6862. if (value) xml_memory::deallocate(value);
  6863. }
  6864. char_t* value;
  6865. char_t name[1];
  6866. };
  6867. struct xpath_variable_node_set: xpath_variable
  6868. {
  6869. xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
  6870. {
  6871. }
  6872. xpath_node_set value;
  6873. char_t name[1];
  6874. };
  6875. static const xpath_node_set dummy_node_set;
  6876. PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str)
  6877. {
  6878. // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
  6879. unsigned int result = 0;
  6880. while (*str)
  6881. {
  6882. result += static_cast<unsigned int>(*str++);
  6883. result += result << 10;
  6884. result ^= result >> 6;
  6885. }
  6886. result += result << 3;
  6887. result ^= result >> 11;
  6888. result += result << 15;
  6889. return result;
  6890. }
  6891. template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
  6892. {
  6893. size_t length = strlength(name);
  6894. if (length == 0) return 0; // empty variable names are invalid
  6895. // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
  6896. void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
  6897. if (!memory) return 0;
  6898. T* result = new (memory) T();
  6899. memcpy(result->name, name, (length + 1) * sizeof(char_t));
  6900. return result;
  6901. }
  6902. PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
  6903. {
  6904. switch (type)
  6905. {
  6906. case xpath_type_node_set:
  6907. return new_xpath_variable<xpath_variable_node_set>(name);
  6908. case xpath_type_number:
  6909. return new_xpath_variable<xpath_variable_number>(name);
  6910. case xpath_type_string:
  6911. return new_xpath_variable<xpath_variable_string>(name);
  6912. case xpath_type_boolean:
  6913. return new_xpath_variable<xpath_variable_boolean>(name);
  6914. default:
  6915. return 0;
  6916. }
  6917. }
  6918. template <typename T> PUGI__FN void delete_xpath_variable(T* var)
  6919. {
  6920. var->~T();
  6921. xml_memory::deallocate(var);
  6922. }
  6923. PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
  6924. {
  6925. switch (type)
  6926. {
  6927. case xpath_type_node_set:
  6928. delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
  6929. break;
  6930. case xpath_type_number:
  6931. delete_xpath_variable(static_cast<xpath_variable_number*>(var));
  6932. break;
  6933. case xpath_type_string:
  6934. delete_xpath_variable(static_cast<xpath_variable_string*>(var));
  6935. break;
  6936. case xpath_type_boolean:
  6937. delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
  6938. break;
  6939. default:
  6940. assert(false && "Invalid variable type"); // unreachable
  6941. }
  6942. }
  6943. PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
  6944. {
  6945. switch (rhs->type())
  6946. {
  6947. case xpath_type_node_set:
  6948. return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
  6949. case xpath_type_number:
  6950. return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
  6951. case xpath_type_string:
  6952. return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
  6953. case xpath_type_boolean:
  6954. return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
  6955. default:
  6956. assert(false && "Invalid variable type"); // unreachable
  6957. return false;
  6958. }
  6959. }
  6960. PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
  6961. {
  6962. size_t length = static_cast<size_t>(end - begin);
  6963. char_t* scratch = buffer;
  6964. if (length >= sizeof(buffer) / sizeof(buffer[0]))
  6965. {
  6966. // need to make dummy on-heap copy
  6967. scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  6968. if (!scratch) return false;
  6969. }
  6970. // copy string to zero-terminated buffer and perform lookup
  6971. memcpy(scratch, begin, length * sizeof(char_t));
  6972. scratch[length] = 0;
  6973. *out_result = set->get(scratch);
  6974. // free dummy buffer
  6975. if (scratch != buffer) xml_memory::deallocate(scratch);
  6976. return true;
  6977. }
  6978. PUGI__NS_END
  6979. // Internal node set class
  6980. PUGI__NS_BEGIN
  6981. PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
  6982. {
  6983. if (end - begin < 2)
  6984. return xpath_node_set::type_sorted;
  6985. document_order_comparator cmp;
  6986. bool first = cmp(begin[0], begin[1]);
  6987. for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
  6988. if (cmp(it[0], it[1]) != first)
  6989. return xpath_node_set::type_unsorted;
  6990. return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
  6991. }
  6992. PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
  6993. {
  6994. xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
  6995. if (type == xpath_node_set::type_unsorted)
  6996. {
  6997. xpath_node_set::type_t sorted = xpath_get_order(begin, end);
  6998. if (sorted == xpath_node_set::type_unsorted)
  6999. {
  7000. sort(begin, end, document_order_comparator());
  7001. type = xpath_node_set::type_sorted;
  7002. }
  7003. else
  7004. type = sorted;
  7005. }
  7006. if (type != order) reverse(begin, end);
  7007. return order;
  7008. }
  7009. PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
  7010. {
  7011. if (begin == end) return xpath_node();
  7012. switch (type)
  7013. {
  7014. case xpath_node_set::type_sorted:
  7015. return *begin;
  7016. case xpath_node_set::type_sorted_reverse:
  7017. return *(end - 1);
  7018. case xpath_node_set::type_unsorted:
  7019. return *min_element(begin, end, document_order_comparator());
  7020. default:
  7021. assert(false && "Invalid node set type"); // unreachable
  7022. return xpath_node();
  7023. }
  7024. }
  7025. class xpath_node_set_raw
  7026. {
  7027. xpath_node_set::type_t _type;
  7028. xpath_node* _begin;
  7029. xpath_node* _end;
  7030. xpath_node* _eos;
  7031. public:
  7032. xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
  7033. {
  7034. }
  7035. xpath_node* begin() const
  7036. {
  7037. return _begin;
  7038. }
  7039. xpath_node* end() const
  7040. {
  7041. return _end;
  7042. }
  7043. bool empty() const
  7044. {
  7045. return _begin == _end;
  7046. }
  7047. size_t size() const
  7048. {
  7049. return static_cast<size_t>(_end - _begin);
  7050. }
  7051. xpath_node first() const
  7052. {
  7053. return xpath_first(_begin, _end, _type);
  7054. }
  7055. void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
  7056. void push_back(const xpath_node& node, xpath_allocator* alloc)
  7057. {
  7058. if (_end != _eos)
  7059. *_end++ = node;
  7060. else
  7061. push_back_grow(node, alloc);
  7062. }
  7063. void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
  7064. {
  7065. if (begin_ == end_) return;
  7066. size_t size_ = static_cast<size_t>(_end - _begin);
  7067. size_t capacity = static_cast<size_t>(_eos - _begin);
  7068. size_t count = static_cast<size_t>(end_ - begin_);
  7069. if (size_ + count > capacity)
  7070. {
  7071. // reallocate the old array or allocate a new one
  7072. xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
  7073. if (!data) return;
  7074. // finalize
  7075. _begin = data;
  7076. _end = data + size_;
  7077. _eos = data + size_ + count;
  7078. }
  7079. memcpy(_end, begin_, count * sizeof(xpath_node));
  7080. _end += count;
  7081. }
  7082. void sort_do()
  7083. {
  7084. _type = xpath_sort(_begin, _end, _type, false);
  7085. }
  7086. void truncate(xpath_node* pos)
  7087. {
  7088. assert(_begin <= pos && pos <= _end);
  7089. _end = pos;
  7090. }
  7091. void remove_duplicates(xpath_allocator* alloc)
  7092. {
  7093. if (_type == xpath_node_set::type_unsorted && _end - _begin > 2)
  7094. {
  7095. xpath_allocator_capture cr(alloc);
  7096. size_t size_ = static_cast<size_t>(_end - _begin);
  7097. size_t hash_size = 1;
  7098. while (hash_size < size_ + size_ / 2) hash_size *= 2;
  7099. const void** hash_data = static_cast<const void**>(alloc->allocate(hash_size * sizeof(void**)));
  7100. if (!hash_data) return;
  7101. memset(hash_data, 0, hash_size * sizeof(const void**));
  7102. xpath_node* write = _begin;
  7103. for (xpath_node* it = _begin; it != _end; ++it)
  7104. {
  7105. const void* attr = it->attribute().internal_object();
  7106. const void* node = it->node().internal_object();
  7107. const void* key = attr ? attr : node;
  7108. if (key && hash_insert(hash_data, hash_size, key))
  7109. {
  7110. *write++ = *it;
  7111. }
  7112. }
  7113. _end = write;
  7114. }
  7115. else
  7116. {
  7117. _end = unique(_begin, _end);
  7118. }
  7119. }
  7120. xpath_node_set::type_t type() const
  7121. {
  7122. return _type;
  7123. }
  7124. void set_type(xpath_node_set::type_t value)
  7125. {
  7126. _type = value;
  7127. }
  7128. };
  7129. PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
  7130. {
  7131. size_t capacity = static_cast<size_t>(_eos - _begin);
  7132. // get new capacity (1.5x rule)
  7133. size_t new_capacity = capacity + capacity / 2 + 1;
  7134. // reallocate the old array or allocate a new one
  7135. xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
  7136. if (!data) return;
  7137. // finalize
  7138. _begin = data;
  7139. _end = data + capacity;
  7140. _eos = data + new_capacity;
  7141. // push
  7142. *_end++ = node;
  7143. }
  7144. PUGI__NS_END
  7145. PUGI__NS_BEGIN
  7146. struct xpath_context
  7147. {
  7148. xpath_node n;
  7149. size_t position, size;
  7150. xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
  7151. {
  7152. }
  7153. };
  7154. enum lexeme_t
  7155. {
  7156. lex_none = 0,
  7157. lex_equal,
  7158. lex_not_equal,
  7159. lex_less,
  7160. lex_greater,
  7161. lex_less_or_equal,
  7162. lex_greater_or_equal,
  7163. lex_plus,
  7164. lex_minus,
  7165. lex_multiply,
  7166. lex_union,
  7167. lex_var_ref,
  7168. lex_open_brace,
  7169. lex_close_brace,
  7170. lex_quoted_string,
  7171. lex_number,
  7172. lex_slash,
  7173. lex_double_slash,
  7174. lex_open_square_brace,
  7175. lex_close_square_brace,
  7176. lex_string,
  7177. lex_comma,
  7178. lex_axis_attribute,
  7179. lex_dot,
  7180. lex_double_dot,
  7181. lex_double_colon,
  7182. lex_eof
  7183. };
  7184. struct xpath_lexer_string
  7185. {
  7186. const char_t* begin;
  7187. const char_t* end;
  7188. xpath_lexer_string(): begin(0), end(0)
  7189. {
  7190. }
  7191. bool operator==(const char_t* other) const
  7192. {
  7193. size_t length = static_cast<size_t>(end - begin);
  7194. return strequalrange(other, begin, length);
  7195. }
  7196. };
  7197. class xpath_lexer
  7198. {
  7199. const char_t* _cur;
  7200. const char_t* _cur_lexeme_pos;
  7201. xpath_lexer_string _cur_lexeme_contents;
  7202. lexeme_t _cur_lexeme;
  7203. public:
  7204. explicit xpath_lexer(const char_t* query): _cur(query)
  7205. {
  7206. next();
  7207. }
  7208. const char_t* state() const
  7209. {
  7210. return _cur;
  7211. }
  7212. void next()
  7213. {
  7214. const char_t* cur = _cur;
  7215. while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
  7216. // save lexeme position for error reporting
  7217. _cur_lexeme_pos = cur;
  7218. switch (*cur)
  7219. {
  7220. case 0:
  7221. _cur_lexeme = lex_eof;
  7222. break;
  7223. case '>':
  7224. if (*(cur+1) == '=')
  7225. {
  7226. cur += 2;
  7227. _cur_lexeme = lex_greater_or_equal;
  7228. }
  7229. else
  7230. {
  7231. cur += 1;
  7232. _cur_lexeme = lex_greater;
  7233. }
  7234. break;
  7235. case '<':
  7236. if (*(cur+1) == '=')
  7237. {
  7238. cur += 2;
  7239. _cur_lexeme = lex_less_or_equal;
  7240. }
  7241. else
  7242. {
  7243. cur += 1;
  7244. _cur_lexeme = lex_less;
  7245. }
  7246. break;
  7247. case '!':
  7248. if (*(cur+1) == '=')
  7249. {
  7250. cur += 2;
  7251. _cur_lexeme = lex_not_equal;
  7252. }
  7253. else
  7254. {
  7255. _cur_lexeme = lex_none;
  7256. }
  7257. break;
  7258. case '=':
  7259. cur += 1;
  7260. _cur_lexeme = lex_equal;
  7261. break;
  7262. case '+':
  7263. cur += 1;
  7264. _cur_lexeme = lex_plus;
  7265. break;
  7266. case '-':
  7267. cur += 1;
  7268. _cur_lexeme = lex_minus;
  7269. break;
  7270. case '*':
  7271. cur += 1;
  7272. _cur_lexeme = lex_multiply;
  7273. break;
  7274. case '|':
  7275. cur += 1;
  7276. _cur_lexeme = lex_union;
  7277. break;
  7278. case '$':
  7279. cur += 1;
  7280. if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
  7281. {
  7282. _cur_lexeme_contents.begin = cur;
  7283. while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
  7284. if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
  7285. {
  7286. cur++; // :
  7287. while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
  7288. }
  7289. _cur_lexeme_contents.end = cur;
  7290. _cur_lexeme = lex_var_ref;
  7291. }
  7292. else
  7293. {
  7294. _cur_lexeme = lex_none;
  7295. }
  7296. break;
  7297. case '(':
  7298. cur += 1;
  7299. _cur_lexeme = lex_open_brace;
  7300. break;
  7301. case ')':
  7302. cur += 1;
  7303. _cur_lexeme = lex_close_brace;
  7304. break;
  7305. case '[':
  7306. cur += 1;
  7307. _cur_lexeme = lex_open_square_brace;
  7308. break;
  7309. case ']':
  7310. cur += 1;
  7311. _cur_lexeme = lex_close_square_brace;
  7312. break;
  7313. case ',':
  7314. cur += 1;
  7315. _cur_lexeme = lex_comma;
  7316. break;
  7317. case '/':
  7318. if (*(cur+1) == '/')
  7319. {
  7320. cur += 2;
  7321. _cur_lexeme = lex_double_slash;
  7322. }
  7323. else
  7324. {
  7325. cur += 1;
  7326. _cur_lexeme = lex_slash;
  7327. }
  7328. break;
  7329. case '.':
  7330. if (*(cur+1) == '.')
  7331. {
  7332. cur += 2;
  7333. _cur_lexeme = lex_double_dot;
  7334. }
  7335. else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
  7336. {
  7337. _cur_lexeme_contents.begin = cur; // .
  7338. ++cur;
  7339. while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
  7340. _cur_lexeme_contents.end = cur;
  7341. _cur_lexeme = lex_number;
  7342. }
  7343. else
  7344. {
  7345. cur += 1;
  7346. _cur_lexeme = lex_dot;
  7347. }
  7348. break;
  7349. case '@':
  7350. cur += 1;
  7351. _cur_lexeme = lex_axis_attribute;
  7352. break;
  7353. case '"':
  7354. case '\'':
  7355. {
  7356. char_t terminator = *cur;
  7357. ++cur;
  7358. _cur_lexeme_contents.begin = cur;
  7359. while (*cur && *cur != terminator) cur++;
  7360. _cur_lexeme_contents.end = cur;
  7361. if (!*cur)
  7362. _cur_lexeme = lex_none;
  7363. else
  7364. {
  7365. cur += 1;
  7366. _cur_lexeme = lex_quoted_string;
  7367. }
  7368. break;
  7369. }
  7370. case ':':
  7371. if (*(cur+1) == ':')
  7372. {
  7373. cur += 2;
  7374. _cur_lexeme = lex_double_colon;
  7375. }
  7376. else
  7377. {
  7378. _cur_lexeme = lex_none;
  7379. }
  7380. break;
  7381. default:
  7382. if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
  7383. {
  7384. _cur_lexeme_contents.begin = cur;
  7385. while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
  7386. if (*cur == '.')
  7387. {
  7388. cur++;
  7389. while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
  7390. }
  7391. _cur_lexeme_contents.end = cur;
  7392. _cur_lexeme = lex_number;
  7393. }
  7394. else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
  7395. {
  7396. _cur_lexeme_contents.begin = cur;
  7397. while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
  7398. if (cur[0] == ':')
  7399. {
  7400. if (cur[1] == '*') // namespace test ncname:*
  7401. {
  7402. cur += 2; // :*
  7403. }
  7404. else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
  7405. {
  7406. cur++; // :
  7407. while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
  7408. }
  7409. }
  7410. _cur_lexeme_contents.end = cur;
  7411. _cur_lexeme = lex_string;
  7412. }
  7413. else
  7414. {
  7415. _cur_lexeme = lex_none;
  7416. }
  7417. }
  7418. _cur = cur;
  7419. }
  7420. lexeme_t current() const
  7421. {
  7422. return _cur_lexeme;
  7423. }
  7424. const char_t* current_pos() const
  7425. {
  7426. return _cur_lexeme_pos;
  7427. }
  7428. const xpath_lexer_string& contents() const
  7429. {
  7430. assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
  7431. return _cur_lexeme_contents;
  7432. }
  7433. };
  7434. enum ast_type_t
  7435. {
  7436. ast_unknown,
  7437. ast_op_or, // left or right
  7438. ast_op_and, // left and right
  7439. ast_op_equal, // left = right
  7440. ast_op_not_equal, // left != right
  7441. ast_op_less, // left < right
  7442. ast_op_greater, // left > right
  7443. ast_op_less_or_equal, // left <= right
  7444. ast_op_greater_or_equal, // left >= right
  7445. ast_op_add, // left + right
  7446. ast_op_subtract, // left - right
  7447. ast_op_multiply, // left * right
  7448. ast_op_divide, // left / right
  7449. ast_op_mod, // left % right
  7450. ast_op_negate, // left - right
  7451. ast_op_union, // left | right
  7452. ast_predicate, // apply predicate to set; next points to next predicate
  7453. ast_filter, // select * from left where right
  7454. ast_string_constant, // string constant
  7455. ast_number_constant, // number constant
  7456. ast_variable, // variable
  7457. ast_func_last, // last()
  7458. ast_func_position, // position()
  7459. ast_func_count, // count(left)
  7460. ast_func_id, // id(left)
  7461. ast_func_local_name_0, // local-name()
  7462. ast_func_local_name_1, // local-name(left)
  7463. ast_func_namespace_uri_0, // namespace-uri()
  7464. ast_func_namespace_uri_1, // namespace-uri(left)
  7465. ast_func_name_0, // name()
  7466. ast_func_name_1, // name(left)
  7467. ast_func_string_0, // string()
  7468. ast_func_string_1, // string(left)
  7469. ast_func_concat, // concat(left, right, siblings)
  7470. ast_func_starts_with, // starts_with(left, right)
  7471. ast_func_contains, // contains(left, right)
  7472. ast_func_substring_before, // substring-before(left, right)
  7473. ast_func_substring_after, // substring-after(left, right)
  7474. ast_func_substring_2, // substring(left, right)
  7475. ast_func_substring_3, // substring(left, right, third)
  7476. ast_func_string_length_0, // string-length()
  7477. ast_func_string_length_1, // string-length(left)
  7478. ast_func_normalize_space_0, // normalize-space()
  7479. ast_func_normalize_space_1, // normalize-space(left)
  7480. ast_func_translate, // translate(left, right, third)
  7481. ast_func_boolean, // boolean(left)
  7482. ast_func_not, // not(left)
  7483. ast_func_true, // true()
  7484. ast_func_false, // false()
  7485. ast_func_lang, // lang(left)
  7486. ast_func_number_0, // number()
  7487. ast_func_number_1, // number(left)
  7488. ast_func_sum, // sum(left)
  7489. ast_func_floor, // floor(left)
  7490. ast_func_ceiling, // ceiling(left)
  7491. ast_func_round, // round(left)
  7492. ast_step, // process set left with step
  7493. ast_step_root, // select root node
  7494. ast_opt_translate_table, // translate(left, right, third) where right/third are constants
  7495. ast_opt_compare_attribute // @name = 'string'
  7496. };
  7497. enum axis_t
  7498. {
  7499. axis_ancestor,
  7500. axis_ancestor_or_self,
  7501. axis_attribute,
  7502. axis_child,
  7503. axis_descendant,
  7504. axis_descendant_or_self,
  7505. axis_following,
  7506. axis_following_sibling,
  7507. axis_namespace,
  7508. axis_parent,
  7509. axis_preceding,
  7510. axis_preceding_sibling,
  7511. axis_self
  7512. };
  7513. enum nodetest_t
  7514. {
  7515. nodetest_none,
  7516. nodetest_name,
  7517. nodetest_type_node,
  7518. nodetest_type_comment,
  7519. nodetest_type_pi,
  7520. nodetest_type_text,
  7521. nodetest_pi,
  7522. nodetest_all,
  7523. nodetest_all_in_namespace
  7524. };
  7525. enum predicate_t
  7526. {
  7527. predicate_default,
  7528. predicate_posinv,
  7529. predicate_constant,
  7530. predicate_constant_one
  7531. };
  7532. enum nodeset_eval_t
  7533. {
  7534. nodeset_eval_all,
  7535. nodeset_eval_any,
  7536. nodeset_eval_first
  7537. };
  7538. template <axis_t N> struct axis_to_type
  7539. {
  7540. static const axis_t axis;
  7541. };
  7542. template <axis_t N> const axis_t axis_to_type<N>::axis = N;
  7543. class xpath_ast_node
  7544. {
  7545. private:
  7546. // node type
  7547. char _type;
  7548. char _rettype;
  7549. // for ast_step
  7550. char _axis;
  7551. // for ast_step/ast_predicate/ast_filter
  7552. char _test;
  7553. // tree node structure
  7554. xpath_ast_node* _left;
  7555. xpath_ast_node* _right;
  7556. xpath_ast_node* _next;
  7557. union
  7558. {
  7559. // value for ast_string_constant
  7560. const char_t* string;
  7561. // value for ast_number_constant
  7562. double number;
  7563. // variable for ast_variable
  7564. xpath_variable* variable;
  7565. // node test for ast_step (node name/namespace/node type/pi target)
  7566. const char_t* nodetest;
  7567. // table for ast_opt_translate_table
  7568. const unsigned char* table;
  7569. } _data;
  7570. xpath_ast_node(const xpath_ast_node&);
  7571. xpath_ast_node& operator=(const xpath_ast_node&);
  7572. template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
  7573. {
  7574. xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
  7575. if (lt != xpath_type_node_set && rt != xpath_type_node_set)
  7576. {
  7577. if (lt == xpath_type_boolean || rt == xpath_type_boolean)
  7578. return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
  7579. else if (lt == xpath_type_number || rt == xpath_type_number)
  7580. return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
  7581. else if (lt == xpath_type_string || rt == xpath_type_string)
  7582. {
  7583. xpath_allocator_capture cr(stack.result);
  7584. xpath_string ls = lhs->eval_string(c, stack);
  7585. xpath_string rs = rhs->eval_string(c, stack);
  7586. return comp(ls, rs);
  7587. }
  7588. }
  7589. else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
  7590. {
  7591. xpath_allocator_capture cr(stack.result);
  7592. xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
  7593. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7594. for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
  7595. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7596. {
  7597. xpath_allocator_capture cri(stack.result);
  7598. if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
  7599. return true;
  7600. }
  7601. return false;
  7602. }
  7603. else
  7604. {
  7605. if (lt == xpath_type_node_set)
  7606. {
  7607. swap(lhs, rhs);
  7608. swap(lt, rt);
  7609. }
  7610. if (lt == xpath_type_boolean)
  7611. return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
  7612. else if (lt == xpath_type_number)
  7613. {
  7614. xpath_allocator_capture cr(stack.result);
  7615. double l = lhs->eval_number(c, stack);
  7616. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7617. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7618. {
  7619. xpath_allocator_capture cri(stack.result);
  7620. if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
  7621. return true;
  7622. }
  7623. return false;
  7624. }
  7625. else if (lt == xpath_type_string)
  7626. {
  7627. xpath_allocator_capture cr(stack.result);
  7628. xpath_string l = lhs->eval_string(c, stack);
  7629. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7630. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7631. {
  7632. xpath_allocator_capture cri(stack.result);
  7633. if (comp(l, string_value(*ri, stack.result)))
  7634. return true;
  7635. }
  7636. return false;
  7637. }
  7638. }
  7639. assert(false && "Wrong types"); // unreachable
  7640. return false;
  7641. }
  7642. static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
  7643. {
  7644. return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
  7645. }
  7646. template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
  7647. {
  7648. xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
  7649. if (lt != xpath_type_node_set && rt != xpath_type_node_set)
  7650. return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
  7651. else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
  7652. {
  7653. xpath_allocator_capture cr(stack.result);
  7654. xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
  7655. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7656. for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
  7657. {
  7658. xpath_allocator_capture cri(stack.result);
  7659. double l = convert_string_to_number(string_value(*li, stack.result).c_str());
  7660. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7661. {
  7662. xpath_allocator_capture crii(stack.result);
  7663. if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
  7664. return true;
  7665. }
  7666. }
  7667. return false;
  7668. }
  7669. else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
  7670. {
  7671. xpath_allocator_capture cr(stack.result);
  7672. double l = lhs->eval_number(c, stack);
  7673. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7674. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7675. {
  7676. xpath_allocator_capture cri(stack.result);
  7677. if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
  7678. return true;
  7679. }
  7680. return false;
  7681. }
  7682. else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
  7683. {
  7684. xpath_allocator_capture cr(stack.result);
  7685. xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
  7686. double r = rhs->eval_number(c, stack);
  7687. for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
  7688. {
  7689. xpath_allocator_capture cri(stack.result);
  7690. if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
  7691. return true;
  7692. }
  7693. return false;
  7694. }
  7695. else
  7696. {
  7697. assert(false && "Wrong types"); // unreachable
  7698. return false;
  7699. }
  7700. }
  7701. static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
  7702. {
  7703. assert(ns.size() >= first);
  7704. assert(expr->rettype() != xpath_type_number);
  7705. size_t i = 1;
  7706. size_t size = ns.size() - first;
  7707. xpath_node* last = ns.begin() + first;
  7708. // remove_if... or well, sort of
  7709. for (xpath_node* it = last; it != ns.end(); ++it, ++i)
  7710. {
  7711. xpath_context c(*it, i, size);
  7712. if (expr->eval_boolean(c, stack))
  7713. {
  7714. *last++ = *it;
  7715. if (once) break;
  7716. }
  7717. }
  7718. ns.truncate(last);
  7719. }
  7720. static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
  7721. {
  7722. assert(ns.size() >= first);
  7723. assert(expr->rettype() == xpath_type_number);
  7724. size_t i = 1;
  7725. size_t size = ns.size() - first;
  7726. xpath_node* last = ns.begin() + first;
  7727. // remove_if... or well, sort of
  7728. for (xpath_node* it = last; it != ns.end(); ++it, ++i)
  7729. {
  7730. xpath_context c(*it, i, size);
  7731. if (expr->eval_number(c, stack) == static_cast<double>(i))
  7732. {
  7733. *last++ = *it;
  7734. if (once) break;
  7735. }
  7736. }
  7737. ns.truncate(last);
  7738. }
  7739. static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
  7740. {
  7741. assert(ns.size() >= first);
  7742. assert(expr->rettype() == xpath_type_number);
  7743. size_t size = ns.size() - first;
  7744. xpath_node* last = ns.begin() + first;
  7745. xpath_context c(xpath_node(), 1, size);
  7746. double er = expr->eval_number(c, stack);
  7747. if (er >= 1.0 && er <= static_cast<double>(size))
  7748. {
  7749. size_t eri = static_cast<size_t>(er);
  7750. if (er == static_cast<double>(eri))
  7751. {
  7752. xpath_node r = last[eri - 1];
  7753. *last++ = r;
  7754. }
  7755. }
  7756. ns.truncate(last);
  7757. }
  7758. void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
  7759. {
  7760. if (ns.size() == first) return;
  7761. assert(_type == ast_filter || _type == ast_predicate);
  7762. if (_test == predicate_constant || _test == predicate_constant_one)
  7763. apply_predicate_number_const(ns, first, _right, stack);
  7764. else if (_right->rettype() == xpath_type_number)
  7765. apply_predicate_number(ns, first, _right, stack, once);
  7766. else
  7767. apply_predicate_boolean(ns, first, _right, stack, once);
  7768. }
  7769. void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
  7770. {
  7771. if (ns.size() == first) return;
  7772. bool last_once = eval_once(ns.type(), eval);
  7773. for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
  7774. pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
  7775. }
  7776. bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
  7777. {
  7778. assert(a);
  7779. const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
  7780. switch (_test)
  7781. {
  7782. case nodetest_name:
  7783. if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
  7784. {
  7785. ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
  7786. return true;
  7787. }
  7788. break;
  7789. case nodetest_type_node:
  7790. case nodetest_all:
  7791. if (is_xpath_attribute(name))
  7792. {
  7793. ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
  7794. return true;
  7795. }
  7796. break;
  7797. case nodetest_all_in_namespace:
  7798. if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
  7799. {
  7800. ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
  7801. return true;
  7802. }
  7803. break;
  7804. default:
  7805. ;
  7806. }
  7807. return false;
  7808. }
  7809. bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
  7810. {
  7811. assert(n);
  7812. xml_node_type type = PUGI__NODETYPE(n);
  7813. switch (_test)
  7814. {
  7815. case nodetest_name:
  7816. if (type == node_element && n->name && strequal(n->name, _data.nodetest))
  7817. {
  7818. ns.push_back(xml_node(n), alloc);
  7819. return true;
  7820. }
  7821. break;
  7822. case nodetest_type_node:
  7823. ns.push_back(xml_node(n), alloc);
  7824. return true;
  7825. case nodetest_type_comment:
  7826. if (type == node_comment)
  7827. {
  7828. ns.push_back(xml_node(n), alloc);
  7829. return true;
  7830. }
  7831. break;
  7832. case nodetest_type_text:
  7833. if (type == node_pcdata || type == node_cdata)
  7834. {
  7835. ns.push_back(xml_node(n), alloc);
  7836. return true;
  7837. }
  7838. break;
  7839. case nodetest_type_pi:
  7840. if (type == node_pi)
  7841. {
  7842. ns.push_back(xml_node(n), alloc);
  7843. return true;
  7844. }
  7845. break;
  7846. case nodetest_pi:
  7847. if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
  7848. {
  7849. ns.push_back(xml_node(n), alloc);
  7850. return true;
  7851. }
  7852. break;
  7853. case nodetest_all:
  7854. if (type == node_element)
  7855. {
  7856. ns.push_back(xml_node(n), alloc);
  7857. return true;
  7858. }
  7859. break;
  7860. case nodetest_all_in_namespace:
  7861. if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
  7862. {
  7863. ns.push_back(xml_node(n), alloc);
  7864. return true;
  7865. }
  7866. break;
  7867. default:
  7868. assert(false && "Unknown axis"); // unreachable
  7869. }
  7870. return false;
  7871. }
  7872. template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
  7873. {
  7874. const axis_t axis = T::axis;
  7875. switch (axis)
  7876. {
  7877. case axis_attribute:
  7878. {
  7879. for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
  7880. if (step_push(ns, a, n, alloc) & once)
  7881. return;
  7882. break;
  7883. }
  7884. case axis_child:
  7885. {
  7886. for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
  7887. if (step_push(ns, c, alloc) & once)
  7888. return;
  7889. break;
  7890. }
  7891. case axis_descendant:
  7892. case axis_descendant_or_self:
  7893. {
  7894. if (axis == axis_descendant_or_self)
  7895. if (step_push(ns, n, alloc) & once)
  7896. return;
  7897. xml_node_struct* cur = n->first_child;
  7898. while (cur)
  7899. {
  7900. if (step_push(ns, cur, alloc) & once)
  7901. return;
  7902. if (cur->first_child)
  7903. cur = cur->first_child;
  7904. else
  7905. {
  7906. while (!cur->next_sibling)
  7907. {
  7908. cur = cur->parent;
  7909. if (cur == n) return;
  7910. }
  7911. cur = cur->next_sibling;
  7912. }
  7913. }
  7914. break;
  7915. }
  7916. case axis_following_sibling:
  7917. {
  7918. for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
  7919. if (step_push(ns, c, alloc) & once)
  7920. return;
  7921. break;
  7922. }
  7923. case axis_preceding_sibling:
  7924. {
  7925. for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
  7926. if (step_push(ns, c, alloc) & once)
  7927. return;
  7928. break;
  7929. }
  7930. case axis_following:
  7931. {
  7932. xml_node_struct* cur = n;
  7933. // exit from this node so that we don't include descendants
  7934. while (!cur->next_sibling)
  7935. {
  7936. cur = cur->parent;
  7937. if (!cur) return;
  7938. }
  7939. cur = cur->next_sibling;
  7940. while (cur)
  7941. {
  7942. if (step_push(ns, cur, alloc) & once)
  7943. return;
  7944. if (cur->first_child)
  7945. cur = cur->first_child;
  7946. else
  7947. {
  7948. while (!cur->next_sibling)
  7949. {
  7950. cur = cur->parent;
  7951. if (!cur) return;
  7952. }
  7953. cur = cur->next_sibling;
  7954. }
  7955. }
  7956. break;
  7957. }
  7958. case axis_preceding:
  7959. {
  7960. xml_node_struct* cur = n;
  7961. // exit from this node so that we don't include descendants
  7962. while (!cur->prev_sibling_c->next_sibling)
  7963. {
  7964. cur = cur->parent;
  7965. if (!cur) return;
  7966. }
  7967. cur = cur->prev_sibling_c;
  7968. while (cur)
  7969. {
  7970. if (cur->first_child)
  7971. cur = cur->first_child->prev_sibling_c;
  7972. else
  7973. {
  7974. // leaf node, can't be ancestor
  7975. if (step_push(ns, cur, alloc) & once)
  7976. return;
  7977. while (!cur->prev_sibling_c->next_sibling)
  7978. {
  7979. cur = cur->parent;
  7980. if (!cur) return;
  7981. if (!node_is_ancestor(cur, n))
  7982. if (step_push(ns, cur, alloc) & once)
  7983. return;
  7984. }
  7985. cur = cur->prev_sibling_c;
  7986. }
  7987. }
  7988. break;
  7989. }
  7990. case axis_ancestor:
  7991. case axis_ancestor_or_self:
  7992. {
  7993. if (axis == axis_ancestor_or_self)
  7994. if (step_push(ns, n, alloc) & once)
  7995. return;
  7996. xml_node_struct* cur = n->parent;
  7997. while (cur)
  7998. {
  7999. if (step_push(ns, cur, alloc) & once)
  8000. return;
  8001. cur = cur->parent;
  8002. }
  8003. break;
  8004. }
  8005. case axis_self:
  8006. {
  8007. step_push(ns, n, alloc);
  8008. break;
  8009. }
  8010. case axis_parent:
  8011. {
  8012. if (n->parent)
  8013. step_push(ns, n->parent, alloc);
  8014. break;
  8015. }
  8016. default:
  8017. assert(false && "Unimplemented axis"); // unreachable
  8018. }
  8019. }
  8020. template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
  8021. {
  8022. const axis_t axis = T::axis;
  8023. switch (axis)
  8024. {
  8025. case axis_ancestor:
  8026. case axis_ancestor_or_self:
  8027. {
  8028. if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
  8029. if (step_push(ns, a, p, alloc) & once)
  8030. return;
  8031. xml_node_struct* cur = p;
  8032. while (cur)
  8033. {
  8034. if (step_push(ns, cur, alloc) & once)
  8035. return;
  8036. cur = cur->parent;
  8037. }
  8038. break;
  8039. }
  8040. case axis_descendant_or_self:
  8041. case axis_self:
  8042. {
  8043. if (_test == nodetest_type_node) // reject attributes based on principal node type test
  8044. step_push(ns, a, p, alloc);
  8045. break;
  8046. }
  8047. case axis_following:
  8048. {
  8049. xml_node_struct* cur = p;
  8050. while (cur)
  8051. {
  8052. if (cur->first_child)
  8053. cur = cur->first_child;
  8054. else
  8055. {
  8056. while (!cur->next_sibling)
  8057. {
  8058. cur = cur->parent;
  8059. if (!cur) return;
  8060. }
  8061. cur = cur->next_sibling;
  8062. }
  8063. if (step_push(ns, cur, alloc) & once)
  8064. return;
  8065. }
  8066. break;
  8067. }
  8068. case axis_parent:
  8069. {
  8070. step_push(ns, p, alloc);
  8071. break;
  8072. }
  8073. case axis_preceding:
  8074. {
  8075. // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
  8076. step_fill(ns, p, alloc, once, v);
  8077. break;
  8078. }
  8079. default:
  8080. assert(false && "Unimplemented axis"); // unreachable
  8081. }
  8082. }
  8083. template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
  8084. {
  8085. const axis_t axis = T::axis;
  8086. const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
  8087. if (xn.node())
  8088. step_fill(ns, xn.node().internal_object(), alloc, once, v);
  8089. else if (axis_has_attributes && xn.attribute() && xn.parent())
  8090. step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
  8091. }
  8092. template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
  8093. {
  8094. const axis_t axis = T::axis;
  8095. const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
  8096. const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
  8097. bool once =
  8098. (axis == axis_attribute && _test == nodetest_name) ||
  8099. (!_right && eval_once(axis_type, eval)) ||
  8100. // coverity[mixed_enums]
  8101. (_right && !_right->_next && _right->_test == predicate_constant_one);
  8102. xpath_node_set_raw ns;
  8103. ns.set_type(axis_type);
  8104. if (_left)
  8105. {
  8106. xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
  8107. // self axis preserves the original order
  8108. if (axis == axis_self) ns.set_type(s.type());
  8109. for (const xpath_node* it = s.begin(); it != s.end(); ++it)
  8110. {
  8111. size_t size = ns.size();
  8112. // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
  8113. if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
  8114. step_fill(ns, *it, stack.result, once, v);
  8115. if (_right) apply_predicates(ns, size, stack, eval);
  8116. }
  8117. }
  8118. else
  8119. {
  8120. step_fill(ns, c.n, stack.result, once, v);
  8121. if (_right) apply_predicates(ns, 0, stack, eval);
  8122. }
  8123. // child, attribute and self axes always generate unique set of nodes
  8124. // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
  8125. if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
  8126. ns.remove_duplicates(stack.temp);
  8127. return ns;
  8128. }
  8129. public:
  8130. xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
  8131. _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
  8132. {
  8133. assert(type == ast_string_constant);
  8134. _data.string = value;
  8135. }
  8136. xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
  8137. _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
  8138. {
  8139. assert(type == ast_number_constant);
  8140. _data.number = value;
  8141. }
  8142. xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
  8143. _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
  8144. {
  8145. assert(type == ast_variable);
  8146. _data.variable = value;
  8147. }
  8148. xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
  8149. _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
  8150. {
  8151. }
  8152. xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
  8153. _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
  8154. {
  8155. assert(type == ast_step);
  8156. _data.nodetest = contents;
  8157. }
  8158. xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
  8159. _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
  8160. {
  8161. assert(type == ast_filter || type == ast_predicate);
  8162. }
  8163. void set_next(xpath_ast_node* value)
  8164. {
  8165. _next = value;
  8166. }
  8167. void set_right(xpath_ast_node* value)
  8168. {
  8169. _right = value;
  8170. }
  8171. bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
  8172. {
  8173. switch (_type)
  8174. {
  8175. case ast_op_or:
  8176. return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
  8177. case ast_op_and:
  8178. return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
  8179. case ast_op_equal:
  8180. return compare_eq(_left, _right, c, stack, equal_to());
  8181. case ast_op_not_equal:
  8182. return compare_eq(_left, _right, c, stack, not_equal_to());
  8183. case ast_op_less:
  8184. return compare_rel(_left, _right, c, stack, less());
  8185. case ast_op_greater:
  8186. return compare_rel(_right, _left, c, stack, less());
  8187. case ast_op_less_or_equal:
  8188. return compare_rel(_left, _right, c, stack, less_equal());
  8189. case ast_op_greater_or_equal:
  8190. return compare_rel(_right, _left, c, stack, less_equal());
  8191. case ast_func_starts_with:
  8192. {
  8193. xpath_allocator_capture cr(stack.result);
  8194. xpath_string lr = _left->eval_string(c, stack);
  8195. xpath_string rr = _right->eval_string(c, stack);
  8196. return starts_with(lr.c_str(), rr.c_str());
  8197. }
  8198. case ast_func_contains:
  8199. {
  8200. xpath_allocator_capture cr(stack.result);
  8201. xpath_string lr = _left->eval_string(c, stack);
  8202. xpath_string rr = _right->eval_string(c, stack);
  8203. return find_substring(lr.c_str(), rr.c_str()) != 0;
  8204. }
  8205. case ast_func_boolean:
  8206. return _left->eval_boolean(c, stack);
  8207. case ast_func_not:
  8208. return !_left->eval_boolean(c, stack);
  8209. case ast_func_true:
  8210. return true;
  8211. case ast_func_false:
  8212. return false;
  8213. case ast_func_lang:
  8214. {
  8215. if (c.n.attribute()) return false;
  8216. xpath_allocator_capture cr(stack.result);
  8217. xpath_string lang = _left->eval_string(c, stack);
  8218. for (xml_node n = c.n.node(); n; n = n.parent())
  8219. {
  8220. xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
  8221. if (a)
  8222. {
  8223. const char_t* value = a.value();
  8224. // strnicmp / strncasecmp is not portable
  8225. for (const char_t* lit = lang.c_str(); *lit; ++lit)
  8226. {
  8227. if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
  8228. ++value;
  8229. }
  8230. return *value == 0 || *value == '-';
  8231. }
  8232. }
  8233. return false;
  8234. }
  8235. case ast_opt_compare_attribute:
  8236. {
  8237. const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
  8238. xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
  8239. return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
  8240. }
  8241. case ast_variable:
  8242. {
  8243. assert(_rettype == _data.variable->type());
  8244. if (_rettype == xpath_type_boolean)
  8245. return _data.variable->get_boolean();
  8246. // variable needs to be converted to the correct type, this is handled by the fallthrough block below
  8247. break;
  8248. }
  8249. default:
  8250. ;
  8251. }
  8252. // none of the ast types that return the value directly matched, we need to perform type conversion
  8253. switch (_rettype)
  8254. {
  8255. case xpath_type_number:
  8256. return convert_number_to_boolean(eval_number(c, stack));
  8257. case xpath_type_string:
  8258. {
  8259. xpath_allocator_capture cr(stack.result);
  8260. return !eval_string(c, stack).empty();
  8261. }
  8262. case xpath_type_node_set:
  8263. {
  8264. xpath_allocator_capture cr(stack.result);
  8265. return !eval_node_set(c, stack, nodeset_eval_any).empty();
  8266. }
  8267. default:
  8268. assert(false && "Wrong expression for return type boolean"); // unreachable
  8269. return false;
  8270. }
  8271. }
  8272. double eval_number(const xpath_context& c, const xpath_stack& stack)
  8273. {
  8274. switch (_type)
  8275. {
  8276. case ast_op_add:
  8277. return _left->eval_number(c, stack) + _right->eval_number(c, stack);
  8278. case ast_op_subtract:
  8279. return _left->eval_number(c, stack) - _right->eval_number(c, stack);
  8280. case ast_op_multiply:
  8281. return _left->eval_number(c, stack) * _right->eval_number(c, stack);
  8282. case ast_op_divide:
  8283. return _left->eval_number(c, stack) / _right->eval_number(c, stack);
  8284. case ast_op_mod:
  8285. return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
  8286. case ast_op_negate:
  8287. return -_left->eval_number(c, stack);
  8288. case ast_number_constant:
  8289. return _data.number;
  8290. case ast_func_last:
  8291. return static_cast<double>(c.size);
  8292. case ast_func_position:
  8293. return static_cast<double>(c.position);
  8294. case ast_func_count:
  8295. {
  8296. xpath_allocator_capture cr(stack.result);
  8297. return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
  8298. }
  8299. case ast_func_string_length_0:
  8300. {
  8301. xpath_allocator_capture cr(stack.result);
  8302. return static_cast<double>(string_value(c.n, stack.result).length());
  8303. }
  8304. case ast_func_string_length_1:
  8305. {
  8306. xpath_allocator_capture cr(stack.result);
  8307. return static_cast<double>(_left->eval_string(c, stack).length());
  8308. }
  8309. case ast_func_number_0:
  8310. {
  8311. xpath_allocator_capture cr(stack.result);
  8312. return convert_string_to_number(string_value(c.n, stack.result).c_str());
  8313. }
  8314. case ast_func_number_1:
  8315. return _left->eval_number(c, stack);
  8316. case ast_func_sum:
  8317. {
  8318. xpath_allocator_capture cr(stack.result);
  8319. double r = 0;
  8320. xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
  8321. for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
  8322. {
  8323. xpath_allocator_capture cri(stack.result);
  8324. r += convert_string_to_number(string_value(*it, stack.result).c_str());
  8325. }
  8326. return r;
  8327. }
  8328. case ast_func_floor:
  8329. {
  8330. double r = _left->eval_number(c, stack);
  8331. return r == r ? floor(r) : r;
  8332. }
  8333. case ast_func_ceiling:
  8334. {
  8335. double r = _left->eval_number(c, stack);
  8336. return r == r ? ceil(r) : r;
  8337. }
  8338. case ast_func_round:
  8339. return round_nearest_nzero(_left->eval_number(c, stack));
  8340. case ast_variable:
  8341. {
  8342. assert(_rettype == _data.variable->type());
  8343. if (_rettype == xpath_type_number)
  8344. return _data.variable->get_number();
  8345. // variable needs to be converted to the correct type, this is handled by the fallthrough block below
  8346. break;
  8347. }
  8348. default:
  8349. ;
  8350. }
  8351. // none of the ast types that return the value directly matched, we need to perform type conversion
  8352. switch (_rettype)
  8353. {
  8354. case xpath_type_boolean:
  8355. return eval_boolean(c, stack) ? 1 : 0;
  8356. case xpath_type_string:
  8357. {
  8358. xpath_allocator_capture cr(stack.result);
  8359. return convert_string_to_number(eval_string(c, stack).c_str());
  8360. }
  8361. case xpath_type_node_set:
  8362. {
  8363. xpath_allocator_capture cr(stack.result);
  8364. return convert_string_to_number(eval_string(c, stack).c_str());
  8365. }
  8366. default:
  8367. assert(false && "Wrong expression for return type number"); // unreachable
  8368. return 0;
  8369. }
  8370. }
  8371. xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
  8372. {
  8373. assert(_type == ast_func_concat);
  8374. xpath_allocator_capture ct(stack.temp);
  8375. // count the string number
  8376. size_t count = 1;
  8377. for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
  8378. // allocate a buffer for temporary string objects
  8379. xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
  8380. if (!buffer) return xpath_string();
  8381. // evaluate all strings to temporary stack
  8382. xpath_stack swapped_stack = {stack.temp, stack.result};
  8383. buffer[0] = _left->eval_string(c, swapped_stack);
  8384. size_t pos = 1;
  8385. for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
  8386. assert(pos == count);
  8387. // get total length
  8388. size_t length = 0;
  8389. for (size_t i = 0; i < count; ++i) length += buffer[i].length();
  8390. // create final string
  8391. char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
  8392. if (!result) return xpath_string();
  8393. char_t* ri = result;
  8394. for (size_t j = 0; j < count; ++j)
  8395. for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
  8396. *ri++ = *bi;
  8397. *ri = 0;
  8398. return xpath_string::from_heap_preallocated(result, ri);
  8399. }
  8400. xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
  8401. {
  8402. switch (_type)
  8403. {
  8404. case ast_string_constant:
  8405. return xpath_string::from_const(_data.string);
  8406. case ast_func_local_name_0:
  8407. {
  8408. xpath_node na = c.n;
  8409. return xpath_string::from_const(local_name(na));
  8410. }
  8411. case ast_func_local_name_1:
  8412. {
  8413. xpath_allocator_capture cr(stack.result);
  8414. xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
  8415. xpath_node na = ns.first();
  8416. return xpath_string::from_const(local_name(na));
  8417. }
  8418. case ast_func_name_0:
  8419. {
  8420. xpath_node na = c.n;
  8421. return xpath_string::from_const(qualified_name(na));
  8422. }
  8423. case ast_func_name_1:
  8424. {
  8425. xpath_allocator_capture cr(stack.result);
  8426. xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
  8427. xpath_node na = ns.first();
  8428. return xpath_string::from_const(qualified_name(na));
  8429. }
  8430. case ast_func_namespace_uri_0:
  8431. {
  8432. xpath_node na = c.n;
  8433. return xpath_string::from_const(namespace_uri(na));
  8434. }
  8435. case ast_func_namespace_uri_1:
  8436. {
  8437. xpath_allocator_capture cr(stack.result);
  8438. xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
  8439. xpath_node na = ns.first();
  8440. return xpath_string::from_const(namespace_uri(na));
  8441. }
  8442. case ast_func_string_0:
  8443. return string_value(c.n, stack.result);
  8444. case ast_func_string_1:
  8445. return _left->eval_string(c, stack);
  8446. case ast_func_concat:
  8447. return eval_string_concat(c, stack);
  8448. case ast_func_substring_before:
  8449. {
  8450. xpath_allocator_capture cr(stack.temp);
  8451. xpath_stack swapped_stack = {stack.temp, stack.result};
  8452. xpath_string s = _left->eval_string(c, swapped_stack);
  8453. xpath_string p = _right->eval_string(c, swapped_stack);
  8454. const char_t* pos = find_substring(s.c_str(), p.c_str());
  8455. return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
  8456. }
  8457. case ast_func_substring_after:
  8458. {
  8459. xpath_allocator_capture cr(stack.temp);
  8460. xpath_stack swapped_stack = {stack.temp, stack.result};
  8461. xpath_string s = _left->eval_string(c, swapped_stack);
  8462. xpath_string p = _right->eval_string(c, swapped_stack);
  8463. const char_t* pos = find_substring(s.c_str(), p.c_str());
  8464. if (!pos) return xpath_string();
  8465. const char_t* rbegin = pos + p.length();
  8466. const char_t* rend = s.c_str() + s.length();
  8467. return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
  8468. }
  8469. case ast_func_substring_2:
  8470. {
  8471. xpath_allocator_capture cr(stack.temp);
  8472. xpath_stack swapped_stack = {stack.temp, stack.result};
  8473. xpath_string s = _left->eval_string(c, swapped_stack);
  8474. size_t s_length = s.length();
  8475. double first = round_nearest(_right->eval_number(c, stack));
  8476. if (is_nan(first)) return xpath_string(); // NaN
  8477. else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
  8478. size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
  8479. assert(1 <= pos && pos <= s_length + 1);
  8480. const char_t* rbegin = s.c_str() + (pos - 1);
  8481. const char_t* rend = s.c_str() + s.length();
  8482. return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
  8483. }
  8484. case ast_func_substring_3:
  8485. {
  8486. xpath_allocator_capture cr(stack.temp);
  8487. xpath_stack swapped_stack = {stack.temp, stack.result};
  8488. xpath_string s = _left->eval_string(c, swapped_stack);
  8489. size_t s_length = s.length();
  8490. double first = round_nearest(_right->eval_number(c, stack));
  8491. double last = first + round_nearest(_right->_next->eval_number(c, stack));
  8492. if (is_nan(first) || is_nan(last)) return xpath_string();
  8493. else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
  8494. else if (first >= last) return xpath_string();
  8495. else if (last < 1) return xpath_string();
  8496. size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
  8497. size_t end = last >= static_cast<double>(s_length + 1) ? s_length + 1 : static_cast<size_t>(last);
  8498. assert(1 <= pos && pos <= end && end <= s_length + 1);
  8499. const char_t* rbegin = s.c_str() + (pos - 1);
  8500. const char_t* rend = s.c_str() + (end - 1);
  8501. return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
  8502. }
  8503. case ast_func_normalize_space_0:
  8504. {
  8505. xpath_string s = string_value(c.n, stack.result);
  8506. char_t* begin = s.data(stack.result);
  8507. if (!begin) return xpath_string();
  8508. char_t* end = normalize_space(begin);
  8509. return xpath_string::from_heap_preallocated(begin, end);
  8510. }
  8511. case ast_func_normalize_space_1:
  8512. {
  8513. xpath_string s = _left->eval_string(c, stack);
  8514. char_t* begin = s.data(stack.result);
  8515. if (!begin) return xpath_string();
  8516. char_t* end = normalize_space(begin);
  8517. return xpath_string::from_heap_preallocated(begin, end);
  8518. }
  8519. case ast_func_translate:
  8520. {
  8521. xpath_allocator_capture cr(stack.temp);
  8522. xpath_stack swapped_stack = {stack.temp, stack.result};
  8523. xpath_string s = _left->eval_string(c, stack);
  8524. xpath_string from = _right->eval_string(c, swapped_stack);
  8525. xpath_string to = _right->_next->eval_string(c, swapped_stack);
  8526. char_t* begin = s.data(stack.result);
  8527. if (!begin) return xpath_string();
  8528. char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
  8529. return xpath_string::from_heap_preallocated(begin, end);
  8530. }
  8531. case ast_opt_translate_table:
  8532. {
  8533. xpath_string s = _left->eval_string(c, stack);
  8534. char_t* begin = s.data(stack.result);
  8535. if (!begin) return xpath_string();
  8536. char_t* end = translate_table(begin, _data.table);
  8537. return xpath_string::from_heap_preallocated(begin, end);
  8538. }
  8539. case ast_variable:
  8540. {
  8541. assert(_rettype == _data.variable->type());
  8542. if (_rettype == xpath_type_string)
  8543. return xpath_string::from_const(_data.variable->get_string());
  8544. // variable needs to be converted to the correct type, this is handled by the fallthrough block below
  8545. break;
  8546. }
  8547. default:
  8548. ;
  8549. }
  8550. // none of the ast types that return the value directly matched, we need to perform type conversion
  8551. switch (_rettype)
  8552. {
  8553. case xpath_type_boolean:
  8554. return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
  8555. case xpath_type_number:
  8556. return convert_number_to_string(eval_number(c, stack), stack.result);
  8557. case xpath_type_node_set:
  8558. {
  8559. xpath_allocator_capture cr(stack.temp);
  8560. xpath_stack swapped_stack = {stack.temp, stack.result};
  8561. xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
  8562. return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
  8563. }
  8564. default:
  8565. assert(false && "Wrong expression for return type string"); // unreachable
  8566. return xpath_string();
  8567. }
  8568. }
  8569. xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
  8570. {
  8571. switch (_type)
  8572. {
  8573. case ast_op_union:
  8574. {
  8575. xpath_allocator_capture cr(stack.temp);
  8576. xpath_stack swapped_stack = {stack.temp, stack.result};
  8577. xpath_node_set_raw ls = _left->eval_node_set(c, stack, eval);
  8578. xpath_node_set_raw rs = _right->eval_node_set(c, swapped_stack, eval);
  8579. // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
  8580. ls.set_type(xpath_node_set::type_unsorted);
  8581. ls.append(rs.begin(), rs.end(), stack.result);
  8582. ls.remove_duplicates(stack.temp);
  8583. return ls;
  8584. }
  8585. case ast_filter:
  8586. {
  8587. xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
  8588. // either expression is a number or it contains position() call; sort by document order
  8589. if (_test != predicate_posinv) set.sort_do();
  8590. bool once = eval_once(set.type(), eval);
  8591. apply_predicate(set, 0, stack, once);
  8592. return set;
  8593. }
  8594. case ast_func_id:
  8595. return xpath_node_set_raw();
  8596. case ast_step:
  8597. {
  8598. switch (_axis)
  8599. {
  8600. case axis_ancestor:
  8601. return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
  8602. case axis_ancestor_or_self:
  8603. return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
  8604. case axis_attribute:
  8605. return step_do(c, stack, eval, axis_to_type<axis_attribute>());
  8606. case axis_child:
  8607. return step_do(c, stack, eval, axis_to_type<axis_child>());
  8608. case axis_descendant:
  8609. return step_do(c, stack, eval, axis_to_type<axis_descendant>());
  8610. case axis_descendant_or_self:
  8611. return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
  8612. case axis_following:
  8613. return step_do(c, stack, eval, axis_to_type<axis_following>());
  8614. case axis_following_sibling:
  8615. return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
  8616. case axis_namespace:
  8617. // namespaced axis is not supported
  8618. return xpath_node_set_raw();
  8619. case axis_parent:
  8620. return step_do(c, stack, eval, axis_to_type<axis_parent>());
  8621. case axis_preceding:
  8622. return step_do(c, stack, eval, axis_to_type<axis_preceding>());
  8623. case axis_preceding_sibling:
  8624. return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
  8625. case axis_self:
  8626. return step_do(c, stack, eval, axis_to_type<axis_self>());
  8627. default:
  8628. assert(false && "Unknown axis"); // unreachable
  8629. return xpath_node_set_raw();
  8630. }
  8631. }
  8632. case ast_step_root:
  8633. {
  8634. assert(!_right); // root step can't have any predicates
  8635. xpath_node_set_raw ns;
  8636. ns.set_type(xpath_node_set::type_sorted);
  8637. if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
  8638. else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
  8639. return ns;
  8640. }
  8641. case ast_variable:
  8642. {
  8643. assert(_rettype == _data.variable->type());
  8644. if (_rettype == xpath_type_node_set)
  8645. {
  8646. const xpath_node_set& s = _data.variable->get_node_set();
  8647. xpath_node_set_raw ns;
  8648. ns.set_type(s.type());
  8649. ns.append(s.begin(), s.end(), stack.result);
  8650. return ns;
  8651. }
  8652. // variable needs to be converted to the correct type, this is handled by the fallthrough block below
  8653. break;
  8654. }
  8655. default:
  8656. ;
  8657. }
  8658. // none of the ast types that return the value directly matched, but conversions to node set are invalid
  8659. assert(false && "Wrong expression for return type node set"); // unreachable
  8660. return xpath_node_set_raw();
  8661. }
  8662. void optimize(xpath_allocator* alloc)
  8663. {
  8664. if (_left)
  8665. _left->optimize(alloc);
  8666. if (_right)
  8667. _right->optimize(alloc);
  8668. if (_next)
  8669. _next->optimize(alloc);
  8670. // coverity[var_deref_model]
  8671. optimize_self(alloc);
  8672. }
  8673. void optimize_self(xpath_allocator* alloc)
  8674. {
  8675. // Rewrite [position()=expr] with [expr]
  8676. // Note that this step has to go before classification to recognize [position()=1]
  8677. if ((_type == ast_filter || _type == ast_predicate) &&
  8678. _right && // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
  8679. _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
  8680. {
  8681. _right = _right->_right;
  8682. }
  8683. // Classify filter/predicate ops to perform various optimizations during evaluation
  8684. if ((_type == ast_filter || _type == ast_predicate) && _right) // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
  8685. {
  8686. assert(_test == predicate_default);
  8687. if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
  8688. _test = predicate_constant_one;
  8689. else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
  8690. _test = predicate_constant;
  8691. else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
  8692. _test = predicate_posinv;
  8693. }
  8694. // Rewrite descendant-or-self::node()/child::foo with descendant::foo
  8695. // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
  8696. // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
  8697. // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
  8698. if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) &&
  8699. _left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
  8700. is_posinv_step())
  8701. {
  8702. if (_axis == axis_child || _axis == axis_descendant)
  8703. _axis = axis_descendant;
  8704. else
  8705. _axis = axis_descendant_or_self;
  8706. _left = _left->_left;
  8707. }
  8708. // Use optimized lookup table implementation for translate() with constant arguments
  8709. if (_type == ast_func_translate &&
  8710. _right && // workaround for clang static analyzer (_right is never null for ast_func_translate)
  8711. _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
  8712. {
  8713. unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
  8714. if (table)
  8715. {
  8716. _type = ast_opt_translate_table;
  8717. _data.table = table;
  8718. }
  8719. }
  8720. // Use optimized path for @attr = 'value' or @attr = $value
  8721. if (_type == ast_op_equal &&
  8722. _left && _right && // workaround for clang static analyzer and Coverity (_left and _right are never null for ast_op_equal)
  8723. // coverity[mixed_enums]
  8724. _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
  8725. (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
  8726. {
  8727. _type = ast_opt_compare_attribute;
  8728. }
  8729. }
  8730. bool is_posinv_expr() const
  8731. {
  8732. switch (_type)
  8733. {
  8734. case ast_func_position:
  8735. case ast_func_last:
  8736. return false;
  8737. case ast_string_constant:
  8738. case ast_number_constant:
  8739. case ast_variable:
  8740. return true;
  8741. case ast_step:
  8742. case ast_step_root:
  8743. return true;
  8744. case ast_predicate:
  8745. case ast_filter:
  8746. return true;
  8747. default:
  8748. if (_left && !_left->is_posinv_expr()) return false;
  8749. for (xpath_ast_node* n = _right; n; n = n->_next)
  8750. if (!n->is_posinv_expr()) return false;
  8751. return true;
  8752. }
  8753. }
  8754. bool is_posinv_step() const
  8755. {
  8756. assert(_type == ast_step);
  8757. for (xpath_ast_node* n = _right; n; n = n->_next)
  8758. {
  8759. assert(n->_type == ast_predicate);
  8760. if (n->_test != predicate_posinv)
  8761. return false;
  8762. }
  8763. return true;
  8764. }
  8765. xpath_value_type rettype() const
  8766. {
  8767. return static_cast<xpath_value_type>(_rettype);
  8768. }
  8769. };
  8770. static const size_t xpath_ast_depth_limit =
  8771. #ifdef PUGIXML_XPATH_DEPTH_LIMIT
  8772. PUGIXML_XPATH_DEPTH_LIMIT
  8773. #else
  8774. 1024
  8775. #endif
  8776. ;
  8777. struct xpath_parser
  8778. {
  8779. xpath_allocator* _alloc;
  8780. xpath_lexer _lexer;
  8781. const char_t* _query;
  8782. xpath_variable_set* _variables;
  8783. xpath_parse_result* _result;
  8784. char_t _scratch[32];
  8785. size_t _depth;
  8786. xpath_ast_node* error(const char* message)
  8787. {
  8788. _result->error = message;
  8789. _result->offset = _lexer.current_pos() - _query;
  8790. return 0;
  8791. }
  8792. xpath_ast_node* error_oom()
  8793. {
  8794. assert(_alloc->_error);
  8795. *_alloc->_error = true;
  8796. return 0;
  8797. }
  8798. xpath_ast_node* error_rec()
  8799. {
  8800. return error("Exceeded maximum allowed query depth");
  8801. }
  8802. void* alloc_node()
  8803. {
  8804. return _alloc->allocate(sizeof(xpath_ast_node));
  8805. }
  8806. xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value)
  8807. {
  8808. void* memory = alloc_node();
  8809. return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
  8810. }
  8811. xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value)
  8812. {
  8813. void* memory = alloc_node();
  8814. return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
  8815. }
  8816. xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value)
  8817. {
  8818. void* memory = alloc_node();
  8819. return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
  8820. }
  8821. xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0)
  8822. {
  8823. void* memory = alloc_node();
  8824. return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0;
  8825. }
  8826. xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents)
  8827. {
  8828. void* memory = alloc_node();
  8829. return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0;
  8830. }
  8831. xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test)
  8832. {
  8833. void* memory = alloc_node();
  8834. return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0;
  8835. }
  8836. const char_t* alloc_string(const xpath_lexer_string& value)
  8837. {
  8838. if (!value.begin)
  8839. return PUGIXML_TEXT("");
  8840. size_t length = static_cast<size_t>(value.end - value.begin);
  8841. char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t)));
  8842. if (!c) return 0;
  8843. memcpy(c, value.begin, length * sizeof(char_t));
  8844. c[length] = 0;
  8845. return c;
  8846. }
  8847. xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
  8848. {
  8849. switch (name.begin[0])
  8850. {
  8851. case 'b':
  8852. if (name == PUGIXML_TEXT("boolean") && argc == 1)
  8853. return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]);
  8854. break;
  8855. case 'c':
  8856. if (name == PUGIXML_TEXT("count") && argc == 1)
  8857. {
  8858. if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  8859. return alloc_node(ast_func_count, xpath_type_number, args[0]);
  8860. }
  8861. else if (name == PUGIXML_TEXT("contains") && argc == 2)
  8862. return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
  8863. else if (name == PUGIXML_TEXT("concat") && argc >= 2)
  8864. return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]);
  8865. else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
  8866. return alloc_node(ast_func_ceiling, xpath_type_number, args[0]);
  8867. break;
  8868. case 'f':
  8869. if (name == PUGIXML_TEXT("false") && argc == 0)
  8870. return alloc_node(ast_func_false, xpath_type_boolean);
  8871. else if (name == PUGIXML_TEXT("floor") && argc == 1)
  8872. return alloc_node(ast_func_floor, xpath_type_number, args[0]);
  8873. break;
  8874. case 'i':
  8875. if (name == PUGIXML_TEXT("id") && argc == 1)
  8876. return alloc_node(ast_func_id, xpath_type_node_set, args[0]);
  8877. break;
  8878. case 'l':
  8879. if (name == PUGIXML_TEXT("last") && argc == 0)
  8880. return alloc_node(ast_func_last, xpath_type_number);
  8881. else if (name == PUGIXML_TEXT("lang") && argc == 1)
  8882. return alloc_node(ast_func_lang, xpath_type_boolean, args[0]);
  8883. else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
  8884. {
  8885. if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  8886. return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]);
  8887. }
  8888. break;
  8889. case 'n':
  8890. if (name == PUGIXML_TEXT("name") && argc <= 1)
  8891. {
  8892. if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  8893. return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]);
  8894. }
  8895. else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
  8896. {
  8897. if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  8898. return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]);
  8899. }
  8900. else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
  8901. return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
  8902. else if (name == PUGIXML_TEXT("not") && argc == 1)
  8903. return alloc_node(ast_func_not, xpath_type_boolean, args[0]);
  8904. else if (name == PUGIXML_TEXT("number") && argc <= 1)
  8905. return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
  8906. break;
  8907. case 'p':
  8908. if (name == PUGIXML_TEXT("position") && argc == 0)
  8909. return alloc_node(ast_func_position, xpath_type_number);
  8910. break;
  8911. case 'r':
  8912. if (name == PUGIXML_TEXT("round") && argc == 1)
  8913. return alloc_node(ast_func_round, xpath_type_number, args[0]);
  8914. break;
  8915. case 's':
  8916. if (name == PUGIXML_TEXT("string") && argc <= 1)
  8917. return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
  8918. else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
  8919. return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
  8920. else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
  8921. return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
  8922. else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
  8923. return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
  8924. else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
  8925. return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
  8926. else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
  8927. return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
  8928. else if (name == PUGIXML_TEXT("sum") && argc == 1)
  8929. {
  8930. if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  8931. return alloc_node(ast_func_sum, xpath_type_number, args[0]);
  8932. }
  8933. break;
  8934. case 't':
  8935. if (name == PUGIXML_TEXT("translate") && argc == 3)
  8936. return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]);
  8937. else if (name == PUGIXML_TEXT("true") && argc == 0)
  8938. return alloc_node(ast_func_true, xpath_type_boolean);
  8939. break;
  8940. default:
  8941. break;
  8942. }
  8943. return error("Unrecognized function or wrong parameter count");
  8944. }
  8945. axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
  8946. {
  8947. specified = true;
  8948. switch (name.begin[0])
  8949. {
  8950. case 'a':
  8951. if (name == PUGIXML_TEXT("ancestor"))
  8952. return axis_ancestor;
  8953. else if (name == PUGIXML_TEXT("ancestor-or-self"))
  8954. return axis_ancestor_or_self;
  8955. else if (name == PUGIXML_TEXT("attribute"))
  8956. return axis_attribute;
  8957. break;
  8958. case 'c':
  8959. if (name == PUGIXML_TEXT("child"))
  8960. return axis_child;
  8961. break;
  8962. case 'd':
  8963. if (name == PUGIXML_TEXT("descendant"))
  8964. return axis_descendant;
  8965. else if (name == PUGIXML_TEXT("descendant-or-self"))
  8966. return axis_descendant_or_self;
  8967. break;
  8968. case 'f':
  8969. if (name == PUGIXML_TEXT("following"))
  8970. return axis_following;
  8971. else if (name == PUGIXML_TEXT("following-sibling"))
  8972. return axis_following_sibling;
  8973. break;
  8974. case 'n':
  8975. if (name == PUGIXML_TEXT("namespace"))
  8976. return axis_namespace;
  8977. break;
  8978. case 'p':
  8979. if (name == PUGIXML_TEXT("parent"))
  8980. return axis_parent;
  8981. else if (name == PUGIXML_TEXT("preceding"))
  8982. return axis_preceding;
  8983. else if (name == PUGIXML_TEXT("preceding-sibling"))
  8984. return axis_preceding_sibling;
  8985. break;
  8986. case 's':
  8987. if (name == PUGIXML_TEXT("self"))
  8988. return axis_self;
  8989. break;
  8990. default:
  8991. break;
  8992. }
  8993. specified = false;
  8994. return axis_child;
  8995. }
  8996. nodetest_t parse_node_test_type(const xpath_lexer_string& name)
  8997. {
  8998. switch (name.begin[0])
  8999. {
  9000. case 'c':
  9001. if (name == PUGIXML_TEXT("comment"))
  9002. return nodetest_type_comment;
  9003. break;
  9004. case 'n':
  9005. if (name == PUGIXML_TEXT("node"))
  9006. return nodetest_type_node;
  9007. break;
  9008. case 'p':
  9009. if (name == PUGIXML_TEXT("processing-instruction"))
  9010. return nodetest_type_pi;
  9011. break;
  9012. case 't':
  9013. if (name == PUGIXML_TEXT("text"))
  9014. return nodetest_type_text;
  9015. break;
  9016. default:
  9017. break;
  9018. }
  9019. return nodetest_none;
  9020. }
  9021. // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
  9022. xpath_ast_node* parse_primary_expression()
  9023. {
  9024. switch (_lexer.current())
  9025. {
  9026. case lex_var_ref:
  9027. {
  9028. xpath_lexer_string name = _lexer.contents();
  9029. if (!_variables)
  9030. return error("Unknown variable: variable set is not provided");
  9031. xpath_variable* var = 0;
  9032. if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
  9033. return error_oom();
  9034. if (!var)
  9035. return error("Unknown variable: variable set does not contain the given name");
  9036. _lexer.next();
  9037. return alloc_node(ast_variable, var->type(), var);
  9038. }
  9039. case lex_open_brace:
  9040. {
  9041. _lexer.next();
  9042. xpath_ast_node* n = parse_expression();
  9043. if (!n) return 0;
  9044. if (_lexer.current() != lex_close_brace)
  9045. return error("Expected ')' to match an opening '('");
  9046. _lexer.next();
  9047. return n;
  9048. }
  9049. case lex_quoted_string:
  9050. {
  9051. const char_t* value = alloc_string(_lexer.contents());
  9052. if (!value) return 0;
  9053. _lexer.next();
  9054. return alloc_node(ast_string_constant, xpath_type_string, value);
  9055. }
  9056. case lex_number:
  9057. {
  9058. double value = 0;
  9059. if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
  9060. return error_oom();
  9061. _lexer.next();
  9062. return alloc_node(ast_number_constant, xpath_type_number, value);
  9063. }
  9064. case lex_string:
  9065. {
  9066. xpath_ast_node* args[2] = {0};
  9067. size_t argc = 0;
  9068. xpath_lexer_string function = _lexer.contents();
  9069. _lexer.next();
  9070. xpath_ast_node* last_arg = 0;
  9071. if (_lexer.current() != lex_open_brace)
  9072. return error("Unrecognized function call");
  9073. _lexer.next();
  9074. size_t old_depth = _depth;
  9075. while (_lexer.current() != lex_close_brace)
  9076. {
  9077. if (argc > 0)
  9078. {
  9079. if (_lexer.current() != lex_comma)
  9080. return error("No comma between function arguments");
  9081. _lexer.next();
  9082. }
  9083. if (++_depth > xpath_ast_depth_limit)
  9084. return error_rec();
  9085. xpath_ast_node* n = parse_expression();
  9086. if (!n) return 0;
  9087. if (argc < 2) args[argc] = n;
  9088. else last_arg->set_next(n);
  9089. argc++;
  9090. last_arg = n;
  9091. }
  9092. _lexer.next();
  9093. _depth = old_depth;
  9094. return parse_function(function, argc, args);
  9095. }
  9096. default:
  9097. return error("Unrecognizable primary expression");
  9098. }
  9099. }
  9100. // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
  9101. // Predicate ::= '[' PredicateExpr ']'
  9102. // PredicateExpr ::= Expr
  9103. xpath_ast_node* parse_filter_expression()
  9104. {
  9105. xpath_ast_node* n = parse_primary_expression();
  9106. if (!n) return 0;
  9107. size_t old_depth = _depth;
  9108. while (_lexer.current() == lex_open_square_brace)
  9109. {
  9110. _lexer.next();
  9111. if (++_depth > xpath_ast_depth_limit)
  9112. return error_rec();
  9113. if (n->rettype() != xpath_type_node_set)
  9114. return error("Predicate has to be applied to node set");
  9115. xpath_ast_node* expr = parse_expression();
  9116. if (!expr) return 0;
  9117. n = alloc_node(ast_filter, n, expr, predicate_default);
  9118. if (!n) return 0;
  9119. if (_lexer.current() != lex_close_square_brace)
  9120. return error("Expected ']' to match an opening '['");
  9121. _lexer.next();
  9122. }
  9123. _depth = old_depth;
  9124. return n;
  9125. }
  9126. // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
  9127. // AxisSpecifier ::= AxisName '::' | '@'?
  9128. // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
  9129. // NameTest ::= '*' | NCName ':' '*' | QName
  9130. // AbbreviatedStep ::= '.' | '..'
  9131. xpath_ast_node* parse_step(xpath_ast_node* set)
  9132. {
  9133. if (set && set->rettype() != xpath_type_node_set)
  9134. return error("Step has to be applied to node set");
  9135. bool axis_specified = false;
  9136. axis_t axis = axis_child; // implied child axis
  9137. if (_lexer.current() == lex_axis_attribute)
  9138. {
  9139. axis = axis_attribute;
  9140. axis_specified = true;
  9141. _lexer.next();
  9142. }
  9143. else if (_lexer.current() == lex_dot)
  9144. {
  9145. _lexer.next();
  9146. if (_lexer.current() == lex_open_square_brace)
  9147. return error("Predicates are not allowed after an abbreviated step");
  9148. return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0);
  9149. }
  9150. else if (_lexer.current() == lex_double_dot)
  9151. {
  9152. _lexer.next();
  9153. if (_lexer.current() == lex_open_square_brace)
  9154. return error("Predicates are not allowed after an abbreviated step");
  9155. return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0);
  9156. }
  9157. nodetest_t nt_type = nodetest_none;
  9158. xpath_lexer_string nt_name;
  9159. if (_lexer.current() == lex_string)
  9160. {
  9161. // node name test
  9162. nt_name = _lexer.contents();
  9163. _lexer.next();
  9164. // was it an axis name?
  9165. if (_lexer.current() == lex_double_colon)
  9166. {
  9167. // parse axis name
  9168. if (axis_specified)
  9169. return error("Two axis specifiers in one step");
  9170. axis = parse_axis_name(nt_name, axis_specified);
  9171. if (!axis_specified)
  9172. return error("Unknown axis");
  9173. // read actual node test
  9174. _lexer.next();
  9175. if (_lexer.current() == lex_multiply)
  9176. {
  9177. nt_type = nodetest_all;
  9178. nt_name = xpath_lexer_string();
  9179. _lexer.next();
  9180. }
  9181. else if (_lexer.current() == lex_string)
  9182. {
  9183. nt_name = _lexer.contents();
  9184. _lexer.next();
  9185. }
  9186. else
  9187. {
  9188. return error("Unrecognized node test");
  9189. }
  9190. }
  9191. if (nt_type == nodetest_none)
  9192. {
  9193. // node type test or processing-instruction
  9194. if (_lexer.current() == lex_open_brace)
  9195. {
  9196. _lexer.next();
  9197. if (_lexer.current() == lex_close_brace)
  9198. {
  9199. _lexer.next();
  9200. nt_type = parse_node_test_type(nt_name);
  9201. if (nt_type == nodetest_none)
  9202. return error("Unrecognized node type");
  9203. nt_name = xpath_lexer_string();
  9204. }
  9205. else if (nt_name == PUGIXML_TEXT("processing-instruction"))
  9206. {
  9207. if (_lexer.current() != lex_quoted_string)
  9208. return error("Only literals are allowed as arguments to processing-instruction()");
  9209. nt_type = nodetest_pi;
  9210. nt_name = _lexer.contents();
  9211. _lexer.next();
  9212. if (_lexer.current() != lex_close_brace)
  9213. return error("Unmatched brace near processing-instruction()");
  9214. _lexer.next();
  9215. }
  9216. else
  9217. {
  9218. return error("Unmatched brace near node type test");
  9219. }
  9220. }
  9221. // QName or NCName:*
  9222. else
  9223. {
  9224. if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
  9225. {
  9226. nt_name.end--; // erase *
  9227. nt_type = nodetest_all_in_namespace;
  9228. }
  9229. else
  9230. {
  9231. nt_type = nodetest_name;
  9232. }
  9233. }
  9234. }
  9235. }
  9236. else if (_lexer.current() == lex_multiply)
  9237. {
  9238. nt_type = nodetest_all;
  9239. _lexer.next();
  9240. }
  9241. else
  9242. {
  9243. return error("Unrecognized node test");
  9244. }
  9245. const char_t* nt_name_copy = alloc_string(nt_name);
  9246. if (!nt_name_copy) return 0;
  9247. xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy);
  9248. if (!n) return 0;
  9249. size_t old_depth = _depth;
  9250. xpath_ast_node* last = 0;
  9251. while (_lexer.current() == lex_open_square_brace)
  9252. {
  9253. _lexer.next();
  9254. if (++_depth > xpath_ast_depth_limit)
  9255. return error_rec();
  9256. xpath_ast_node* expr = parse_expression();
  9257. if (!expr) return 0;
  9258. xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default);
  9259. if (!pred) return 0;
  9260. if (_lexer.current() != lex_close_square_brace)
  9261. return error("Expected ']' to match an opening '['");
  9262. _lexer.next();
  9263. if (last) last->set_next(pred);
  9264. else n->set_right(pred);
  9265. last = pred;
  9266. }
  9267. _depth = old_depth;
  9268. return n;
  9269. }
  9270. // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
  9271. xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
  9272. {
  9273. xpath_ast_node* n = parse_step(set);
  9274. if (!n) return 0;
  9275. size_t old_depth = _depth;
  9276. while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
  9277. {
  9278. lexeme_t l = _lexer.current();
  9279. _lexer.next();
  9280. if (l == lex_double_slash)
  9281. {
  9282. n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
  9283. if (!n) return 0;
  9284. ++_depth;
  9285. }
  9286. if (++_depth > xpath_ast_depth_limit)
  9287. return error_rec();
  9288. n = parse_step(n);
  9289. if (!n) return 0;
  9290. }
  9291. _depth = old_depth;
  9292. return n;
  9293. }
  9294. // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
  9295. // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
  9296. xpath_ast_node* parse_location_path()
  9297. {
  9298. if (_lexer.current() == lex_slash)
  9299. {
  9300. _lexer.next();
  9301. xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
  9302. if (!n) return 0;
  9303. // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
  9304. lexeme_t l = _lexer.current();
  9305. if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
  9306. return parse_relative_location_path(n);
  9307. else
  9308. return n;
  9309. }
  9310. else if (_lexer.current() == lex_double_slash)
  9311. {
  9312. _lexer.next();
  9313. xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
  9314. if (!n) return 0;
  9315. n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
  9316. if (!n) return 0;
  9317. return parse_relative_location_path(n);
  9318. }
  9319. // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
  9320. return parse_relative_location_path(0);
  9321. }
  9322. // PathExpr ::= LocationPath
  9323. // | FilterExpr
  9324. // | FilterExpr '/' RelativeLocationPath
  9325. // | FilterExpr '//' RelativeLocationPath
  9326. // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
  9327. // UnaryExpr ::= UnionExpr | '-' UnaryExpr
  9328. xpath_ast_node* parse_path_or_unary_expression()
  9329. {
  9330. // Clarification.
  9331. // PathExpr begins with either LocationPath or FilterExpr.
  9332. // FilterExpr begins with PrimaryExpr
  9333. // PrimaryExpr begins with '$' in case of it being a variable reference,
  9334. // '(' in case of it being an expression, string literal, number constant or
  9335. // function call.
  9336. if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
  9337. _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
  9338. _lexer.current() == lex_string)
  9339. {
  9340. if (_lexer.current() == lex_string)
  9341. {
  9342. // This is either a function call, or not - if not, we shall proceed with location path
  9343. const char_t* state = _lexer.state();
  9344. while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
  9345. if (*state != '(')
  9346. return parse_location_path();
  9347. // This looks like a function call; however this still can be a node-test. Check it.
  9348. if (parse_node_test_type(_lexer.contents()) != nodetest_none)
  9349. return parse_location_path();
  9350. }
  9351. xpath_ast_node* n = parse_filter_expression();
  9352. if (!n) return 0;
  9353. if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
  9354. {
  9355. lexeme_t l = _lexer.current();
  9356. _lexer.next();
  9357. if (l == lex_double_slash)
  9358. {
  9359. if (n->rettype() != xpath_type_node_set)
  9360. return error("Step has to be applied to node set");
  9361. n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
  9362. if (!n) return 0;
  9363. }
  9364. // select from location path
  9365. return parse_relative_location_path(n);
  9366. }
  9367. return n;
  9368. }
  9369. else if (_lexer.current() == lex_minus)
  9370. {
  9371. _lexer.next();
  9372. // precedence 7+ - only parses union expressions
  9373. xpath_ast_node* n = parse_expression(7);
  9374. if (!n) return 0;
  9375. return alloc_node(ast_op_negate, xpath_type_number, n);
  9376. }
  9377. else
  9378. {
  9379. return parse_location_path();
  9380. }
  9381. }
  9382. struct binary_op_t
  9383. {
  9384. ast_type_t asttype;
  9385. xpath_value_type rettype;
  9386. int precedence;
  9387. binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
  9388. {
  9389. }
  9390. binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
  9391. {
  9392. }
  9393. static binary_op_t parse(xpath_lexer& lexer)
  9394. {
  9395. switch (lexer.current())
  9396. {
  9397. case lex_string:
  9398. if (lexer.contents() == PUGIXML_TEXT("or"))
  9399. return binary_op_t(ast_op_or, xpath_type_boolean, 1);
  9400. else if (lexer.contents() == PUGIXML_TEXT("and"))
  9401. return binary_op_t(ast_op_and, xpath_type_boolean, 2);
  9402. else if (lexer.contents() == PUGIXML_TEXT("div"))
  9403. return binary_op_t(ast_op_divide, xpath_type_number, 6);
  9404. else if (lexer.contents() == PUGIXML_TEXT("mod"))
  9405. return binary_op_t(ast_op_mod, xpath_type_number, 6);
  9406. else
  9407. return binary_op_t();
  9408. case lex_equal:
  9409. return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
  9410. case lex_not_equal:
  9411. return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
  9412. case lex_less:
  9413. return binary_op_t(ast_op_less, xpath_type_boolean, 4);
  9414. case lex_greater:
  9415. return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
  9416. case lex_less_or_equal:
  9417. return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
  9418. case lex_greater_or_equal:
  9419. return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
  9420. case lex_plus:
  9421. return binary_op_t(ast_op_add, xpath_type_number, 5);
  9422. case lex_minus:
  9423. return binary_op_t(ast_op_subtract, xpath_type_number, 5);
  9424. case lex_multiply:
  9425. return binary_op_t(ast_op_multiply, xpath_type_number, 6);
  9426. case lex_union:
  9427. return binary_op_t(ast_op_union, xpath_type_node_set, 7);
  9428. default:
  9429. return binary_op_t();
  9430. }
  9431. }
  9432. };
  9433. xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
  9434. {
  9435. binary_op_t op = binary_op_t::parse(_lexer);
  9436. while (op.asttype != ast_unknown && op.precedence >= limit)
  9437. {
  9438. _lexer.next();
  9439. if (++_depth > xpath_ast_depth_limit)
  9440. return error_rec();
  9441. xpath_ast_node* rhs = parse_path_or_unary_expression();
  9442. if (!rhs) return 0;
  9443. binary_op_t nextop = binary_op_t::parse(_lexer);
  9444. while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
  9445. {
  9446. rhs = parse_expression_rec(rhs, nextop.precedence);
  9447. if (!rhs) return 0;
  9448. nextop = binary_op_t::parse(_lexer);
  9449. }
  9450. if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
  9451. return error("Union operator has to be applied to node sets");
  9452. lhs = alloc_node(op.asttype, op.rettype, lhs, rhs);
  9453. if (!lhs) return 0;
  9454. op = binary_op_t::parse(_lexer);
  9455. }
  9456. return lhs;
  9457. }
  9458. // Expr ::= OrExpr
  9459. // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
  9460. // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
  9461. // EqualityExpr ::= RelationalExpr
  9462. // | EqualityExpr '=' RelationalExpr
  9463. // | EqualityExpr '!=' RelationalExpr
  9464. // RelationalExpr ::= AdditiveExpr
  9465. // | RelationalExpr '<' AdditiveExpr
  9466. // | RelationalExpr '>' AdditiveExpr
  9467. // | RelationalExpr '<=' AdditiveExpr
  9468. // | RelationalExpr '>=' AdditiveExpr
  9469. // AdditiveExpr ::= MultiplicativeExpr
  9470. // | AdditiveExpr '+' MultiplicativeExpr
  9471. // | AdditiveExpr '-' MultiplicativeExpr
  9472. // MultiplicativeExpr ::= UnaryExpr
  9473. // | MultiplicativeExpr '*' UnaryExpr
  9474. // | MultiplicativeExpr 'div' UnaryExpr
  9475. // | MultiplicativeExpr 'mod' UnaryExpr
  9476. xpath_ast_node* parse_expression(int limit = 0)
  9477. {
  9478. size_t old_depth = _depth;
  9479. if (++_depth > xpath_ast_depth_limit)
  9480. return error_rec();
  9481. xpath_ast_node* n = parse_path_or_unary_expression();
  9482. if (!n) return 0;
  9483. n = parse_expression_rec(n, limit);
  9484. _depth = old_depth;
  9485. return n;
  9486. }
  9487. xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0)
  9488. {
  9489. }
  9490. xpath_ast_node* parse()
  9491. {
  9492. xpath_ast_node* n = parse_expression();
  9493. if (!n) return 0;
  9494. assert(_depth == 0);
  9495. // check if there are unparsed tokens left
  9496. if (_lexer.current() != lex_eof)
  9497. return error("Incorrect query");
  9498. return n;
  9499. }
  9500. static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
  9501. {
  9502. xpath_parser parser(query, variables, alloc, result);
  9503. return parser.parse();
  9504. }
  9505. };
  9506. struct xpath_query_impl
  9507. {
  9508. static xpath_query_impl* create()
  9509. {
  9510. void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
  9511. if (!memory) return 0;
  9512. return new (memory) xpath_query_impl();
  9513. }
  9514. static void destroy(xpath_query_impl* impl)
  9515. {
  9516. // free all allocated pages
  9517. impl->alloc.release();
  9518. // free allocator memory (with the first page)
  9519. xml_memory::deallocate(impl);
  9520. }
  9521. xpath_query_impl(): root(0), alloc(&block, &oom), oom(false)
  9522. {
  9523. block.next = 0;
  9524. block.capacity = sizeof(block.data);
  9525. }
  9526. xpath_ast_node* root;
  9527. xpath_allocator alloc;
  9528. xpath_memory_block block;
  9529. bool oom;
  9530. };
  9531. PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
  9532. {
  9533. if (!impl) return 0;
  9534. if (impl->root->rettype() != xpath_type_node_set)
  9535. {
  9536. #ifdef PUGIXML_NO_EXCEPTIONS
  9537. return 0;
  9538. #else
  9539. xpath_parse_result res;
  9540. res.error = "Expression does not evaluate to node set";
  9541. throw xpath_exception(res);
  9542. #endif
  9543. }
  9544. return impl->root;
  9545. }
  9546. PUGI__NS_END
  9547. namespace pugi
  9548. {
  9549. #ifndef PUGIXML_NO_EXCEPTIONS
  9550. PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
  9551. {
  9552. assert(_result.error);
  9553. }
  9554. PUGI__FN const char* xpath_exception::what() const throw()
  9555. {
  9556. return _result.error;
  9557. }
  9558. PUGI__FN const xpath_parse_result& xpath_exception::result() const
  9559. {
  9560. return _result;
  9561. }
  9562. #endif
  9563. PUGI__FN xpath_node::xpath_node()
  9564. {
  9565. }
  9566. PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
  9567. {
  9568. }
  9569. PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
  9570. {
  9571. }
  9572. PUGI__FN xml_node xpath_node::node() const
  9573. {
  9574. return _attribute ? xml_node() : _node;
  9575. }
  9576. PUGI__FN xml_attribute xpath_node::attribute() const
  9577. {
  9578. return _attribute;
  9579. }
  9580. PUGI__FN xml_node xpath_node::parent() const
  9581. {
  9582. return _attribute ? _node : _node.parent();
  9583. }
  9584. PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
  9585. {
  9586. }
  9587. PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
  9588. {
  9589. return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
  9590. }
  9591. PUGI__FN bool xpath_node::operator!() const
  9592. {
  9593. return !(_node || _attribute);
  9594. }
  9595. PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
  9596. {
  9597. return _node == n._node && _attribute == n._attribute;
  9598. }
  9599. PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
  9600. {
  9601. return _node != n._node || _attribute != n._attribute;
  9602. }
  9603. #ifdef __BORLANDC__
  9604. PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
  9605. {
  9606. return (bool)lhs && rhs;
  9607. }
  9608. PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
  9609. {
  9610. return (bool)lhs || rhs;
  9611. }
  9612. #endif
  9613. PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
  9614. {
  9615. assert(begin_ <= end_);
  9616. size_t size_ = static_cast<size_t>(end_ - begin_);
  9617. // use internal buffer for 0 or 1 elements, heap buffer otherwise
  9618. xpath_node* storage = (size_ <= 1) ? _storage : static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
  9619. if (!storage)
  9620. {
  9621. #ifdef PUGIXML_NO_EXCEPTIONS
  9622. return;
  9623. #else
  9624. throw std::bad_alloc();
  9625. #endif
  9626. }
  9627. // deallocate old buffer
  9628. if (_begin != _storage)
  9629. impl::xml_memory::deallocate(_begin);
  9630. // size check is necessary because for begin_ = end_ = nullptr, memcpy is UB
  9631. if (size_)
  9632. memcpy(storage, begin_, size_ * sizeof(xpath_node));
  9633. _begin = storage;
  9634. _end = storage + size_;
  9635. _type = type_;
  9636. }
  9637. #ifdef PUGIXML_HAS_MOVE
  9638. PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT
  9639. {
  9640. _type = rhs._type;
  9641. _storage[0] = rhs._storage[0];
  9642. _begin = (rhs._begin == rhs._storage) ? _storage : rhs._begin;
  9643. _end = _begin + (rhs._end - rhs._begin);
  9644. rhs._type = type_unsorted;
  9645. rhs._begin = rhs._storage;
  9646. rhs._end = rhs._storage;
  9647. }
  9648. #endif
  9649. PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(_storage), _end(_storage)
  9650. {
  9651. }
  9652. PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(_storage), _end(_storage)
  9653. {
  9654. _assign(begin_, end_, type_);
  9655. }
  9656. PUGI__FN xpath_node_set::~xpath_node_set()
  9657. {
  9658. if (_begin != _storage)
  9659. impl::xml_memory::deallocate(_begin);
  9660. }
  9661. PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(_storage), _end(_storage)
  9662. {
  9663. _assign(ns._begin, ns._end, ns._type);
  9664. }
  9665. PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
  9666. {
  9667. if (this == &ns) return *this;
  9668. _assign(ns._begin, ns._end, ns._type);
  9669. return *this;
  9670. }
  9671. #ifdef PUGIXML_HAS_MOVE
  9672. PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(_storage), _end(_storage)
  9673. {
  9674. _move(rhs);
  9675. }
  9676. PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT
  9677. {
  9678. if (this == &rhs) return *this;
  9679. if (_begin != _storage)
  9680. impl::xml_memory::deallocate(_begin);
  9681. _move(rhs);
  9682. return *this;
  9683. }
  9684. #endif
  9685. PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
  9686. {
  9687. return _type;
  9688. }
  9689. PUGI__FN size_t xpath_node_set::size() const
  9690. {
  9691. return _end - _begin;
  9692. }
  9693. PUGI__FN bool xpath_node_set::empty() const
  9694. {
  9695. return _begin == _end;
  9696. }
  9697. PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
  9698. {
  9699. assert(index < size());
  9700. return _begin[index];
  9701. }
  9702. PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
  9703. {
  9704. return _begin;
  9705. }
  9706. PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
  9707. {
  9708. return _end;
  9709. }
  9710. PUGI__FN void xpath_node_set::sort(bool reverse)
  9711. {
  9712. _type = impl::xpath_sort(_begin, _end, _type, reverse);
  9713. }
  9714. PUGI__FN xpath_node xpath_node_set::first() const
  9715. {
  9716. return impl::xpath_first(_begin, _end, _type);
  9717. }
  9718. PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
  9719. {
  9720. }
  9721. PUGI__FN xpath_parse_result::operator bool() const
  9722. {
  9723. return error == 0;
  9724. }
  9725. PUGI__FN const char* xpath_parse_result::description() const
  9726. {
  9727. return error ? error : "No error";
  9728. }
  9729. PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
  9730. {
  9731. }
  9732. PUGI__FN const char_t* xpath_variable::name() const
  9733. {
  9734. switch (_type)
  9735. {
  9736. case xpath_type_node_set:
  9737. return static_cast<const impl::xpath_variable_node_set*>(this)->name;
  9738. case xpath_type_number:
  9739. return static_cast<const impl::xpath_variable_number*>(this)->name;
  9740. case xpath_type_string:
  9741. return static_cast<const impl::xpath_variable_string*>(this)->name;
  9742. case xpath_type_boolean:
  9743. return static_cast<const impl::xpath_variable_boolean*>(this)->name;
  9744. default:
  9745. assert(false && "Invalid variable type"); // unreachable
  9746. return 0;
  9747. }
  9748. }
  9749. PUGI__FN xpath_value_type xpath_variable::type() const
  9750. {
  9751. return _type;
  9752. }
  9753. PUGI__FN bool xpath_variable::get_boolean() const
  9754. {
  9755. return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
  9756. }
  9757. PUGI__FN double xpath_variable::get_number() const
  9758. {
  9759. return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
  9760. }
  9761. PUGI__FN const char_t* xpath_variable::get_string() const
  9762. {
  9763. const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
  9764. return value ? value : PUGIXML_TEXT("");
  9765. }
  9766. PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
  9767. {
  9768. return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
  9769. }
  9770. PUGI__FN bool xpath_variable::set(bool value)
  9771. {
  9772. if (_type != xpath_type_boolean) return false;
  9773. static_cast<impl::xpath_variable_boolean*>(this)->value = value;
  9774. return true;
  9775. }
  9776. PUGI__FN bool xpath_variable::set(double value)
  9777. {
  9778. if (_type != xpath_type_number) return false;
  9779. static_cast<impl::xpath_variable_number*>(this)->value = value;
  9780. return true;
  9781. }
  9782. PUGI__FN bool xpath_variable::set(const char_t* value)
  9783. {
  9784. if (_type != xpath_type_string) return false;
  9785. impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
  9786. // duplicate string
  9787. size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
  9788. char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
  9789. if (!copy) return false;
  9790. memcpy(copy, value, size);
  9791. // replace old string
  9792. if (var->value) impl::xml_memory::deallocate(var->value);
  9793. var->value = copy;
  9794. return true;
  9795. }
  9796. PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
  9797. {
  9798. if (_type != xpath_type_node_set) return false;
  9799. static_cast<impl::xpath_variable_node_set*>(this)->value = value;
  9800. return true;
  9801. }
  9802. PUGI__FN xpath_variable_set::xpath_variable_set()
  9803. {
  9804. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9805. _data[i] = 0;
  9806. }
  9807. PUGI__FN xpath_variable_set::~xpath_variable_set()
  9808. {
  9809. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9810. _destroy(_data[i]);
  9811. }
  9812. PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
  9813. {
  9814. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9815. _data[i] = 0;
  9816. _assign(rhs);
  9817. }
  9818. PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
  9819. {
  9820. if (this == &rhs) return *this;
  9821. _assign(rhs);
  9822. return *this;
  9823. }
  9824. #ifdef PUGIXML_HAS_MOVE
  9825. PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
  9826. {
  9827. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9828. {
  9829. _data[i] = rhs._data[i];
  9830. rhs._data[i] = 0;
  9831. }
  9832. }
  9833. PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
  9834. {
  9835. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9836. {
  9837. _destroy(_data[i]);
  9838. _data[i] = rhs._data[i];
  9839. rhs._data[i] = 0;
  9840. }
  9841. return *this;
  9842. }
  9843. #endif
  9844. PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
  9845. {
  9846. xpath_variable_set temp;
  9847. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9848. if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
  9849. return;
  9850. _swap(temp);
  9851. }
  9852. PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
  9853. {
  9854. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9855. {
  9856. xpath_variable* chain = _data[i];
  9857. _data[i] = rhs._data[i];
  9858. rhs._data[i] = chain;
  9859. }
  9860. }
  9861. PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
  9862. {
  9863. const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
  9864. size_t hash = impl::hash_string(name) % hash_size;
  9865. // look for existing variable
  9866. for (xpath_variable* var = _data[hash]; var; var = var->_next)
  9867. if (impl::strequal(var->name(), name))
  9868. return var;
  9869. return 0;
  9870. }
  9871. PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
  9872. {
  9873. xpath_variable* last = 0;
  9874. while (var)
  9875. {
  9876. // allocate storage for new variable
  9877. xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
  9878. if (!nvar) return false;
  9879. // link the variable to the result immediately to handle failures gracefully
  9880. if (last)
  9881. last->_next = nvar;
  9882. else
  9883. *out_result = nvar;
  9884. last = nvar;
  9885. // copy the value; this can fail due to out-of-memory conditions
  9886. if (!impl::copy_xpath_variable(nvar, var)) return false;
  9887. var = var->_next;
  9888. }
  9889. return true;
  9890. }
  9891. PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
  9892. {
  9893. while (var)
  9894. {
  9895. xpath_variable* next = var->_next;
  9896. impl::delete_xpath_variable(var->_type, var);
  9897. var = next;
  9898. }
  9899. }
  9900. PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
  9901. {
  9902. const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
  9903. size_t hash = impl::hash_string(name) % hash_size;
  9904. // look for existing variable
  9905. for (xpath_variable* var = _data[hash]; var; var = var->_next)
  9906. if (impl::strequal(var->name(), name))
  9907. return var->type() == type ? var : 0;
  9908. // add new variable
  9909. xpath_variable* result = impl::new_xpath_variable(type, name);
  9910. if (result)
  9911. {
  9912. result->_next = _data[hash];
  9913. _data[hash] = result;
  9914. }
  9915. return result;
  9916. }
  9917. PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
  9918. {
  9919. xpath_variable* var = add(name, xpath_type_boolean);
  9920. return var ? var->set(value) : false;
  9921. }
  9922. PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
  9923. {
  9924. xpath_variable* var = add(name, xpath_type_number);
  9925. return var ? var->set(value) : false;
  9926. }
  9927. PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
  9928. {
  9929. xpath_variable* var = add(name, xpath_type_string);
  9930. return var ? var->set(value) : false;
  9931. }
  9932. PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
  9933. {
  9934. xpath_variable* var = add(name, xpath_type_node_set);
  9935. return var ? var->set(value) : false;
  9936. }
  9937. PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
  9938. {
  9939. return _find(name);
  9940. }
  9941. PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
  9942. {
  9943. return _find(name);
  9944. }
  9945. PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
  9946. {
  9947. impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
  9948. if (!qimpl)
  9949. {
  9950. #ifdef PUGIXML_NO_EXCEPTIONS
  9951. _result.error = "Out of memory";
  9952. #else
  9953. throw std::bad_alloc();
  9954. #endif
  9955. }
  9956. else
  9957. {
  9958. using impl::auto_deleter; // MSVC7 workaround
  9959. auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
  9960. qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
  9961. if (qimpl->root)
  9962. {
  9963. qimpl->root->optimize(&qimpl->alloc);
  9964. _impl = impl.release();
  9965. _result.error = 0;
  9966. }
  9967. else
  9968. {
  9969. #ifdef PUGIXML_NO_EXCEPTIONS
  9970. if (qimpl->oom) _result.error = "Out of memory";
  9971. #else
  9972. if (qimpl->oom) throw std::bad_alloc();
  9973. throw xpath_exception(_result);
  9974. #endif
  9975. }
  9976. }
  9977. }
  9978. PUGI__FN xpath_query::xpath_query(): _impl(0)
  9979. {
  9980. }
  9981. PUGI__FN xpath_query::~xpath_query()
  9982. {
  9983. if (_impl)
  9984. impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
  9985. }
  9986. #ifdef PUGIXML_HAS_MOVE
  9987. PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT
  9988. {
  9989. _impl = rhs._impl;
  9990. _result = rhs._result;
  9991. rhs._impl = 0;
  9992. rhs._result = xpath_parse_result();
  9993. }
  9994. PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT
  9995. {
  9996. if (this == &rhs) return *this;
  9997. if (_impl)
  9998. impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
  9999. _impl = rhs._impl;
  10000. _result = rhs._result;
  10001. rhs._impl = 0;
  10002. rhs._result = xpath_parse_result();
  10003. return *this;
  10004. }
  10005. #endif
  10006. PUGI__FN xpath_value_type xpath_query::return_type() const
  10007. {
  10008. if (!_impl) return xpath_type_none;
  10009. return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
  10010. }
  10011. PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
  10012. {
  10013. if (!_impl) return false;
  10014. impl::xpath_context c(n, 1, 1);
  10015. impl::xpath_stack_data sd;
  10016. bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
  10017. if (sd.oom)
  10018. {
  10019. #ifdef PUGIXML_NO_EXCEPTIONS
  10020. return false;
  10021. #else
  10022. throw std::bad_alloc();
  10023. #endif
  10024. }
  10025. return r;
  10026. }
  10027. PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
  10028. {
  10029. if (!_impl) return impl::gen_nan();
  10030. impl::xpath_context c(n, 1, 1);
  10031. impl::xpath_stack_data sd;
  10032. double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
  10033. if (sd.oom)
  10034. {
  10035. #ifdef PUGIXML_NO_EXCEPTIONS
  10036. return impl::gen_nan();
  10037. #else
  10038. throw std::bad_alloc();
  10039. #endif
  10040. }
  10041. return r;
  10042. }
  10043. #ifndef PUGIXML_NO_STL
  10044. PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
  10045. {
  10046. if (!_impl) return string_t();
  10047. impl::xpath_context c(n, 1, 1);
  10048. impl::xpath_stack_data sd;
  10049. impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack);
  10050. if (sd.oom)
  10051. {
  10052. #ifdef PUGIXML_NO_EXCEPTIONS
  10053. return string_t();
  10054. #else
  10055. throw std::bad_alloc();
  10056. #endif
  10057. }
  10058. return string_t(r.c_str(), r.length());
  10059. }
  10060. #endif
  10061. PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
  10062. {
  10063. impl::xpath_context c(n, 1, 1);
  10064. impl::xpath_stack_data sd;
  10065. impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string();
  10066. if (sd.oom)
  10067. {
  10068. #ifdef PUGIXML_NO_EXCEPTIONS
  10069. r = impl::xpath_string();
  10070. #else
  10071. throw std::bad_alloc();
  10072. #endif
  10073. }
  10074. size_t full_size = r.length() + 1;
  10075. if (capacity > 0)
  10076. {
  10077. size_t size = (full_size < capacity) ? full_size : capacity;
  10078. assert(size > 0);
  10079. memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
  10080. buffer[size - 1] = 0;
  10081. }
  10082. return full_size;
  10083. }
  10084. PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
  10085. {
  10086. impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
  10087. if (!root) return xpath_node_set();
  10088. impl::xpath_context c(n, 1, 1);
  10089. impl::xpath_stack_data sd;
  10090. impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
  10091. if (sd.oom)
  10092. {
  10093. #ifdef PUGIXML_NO_EXCEPTIONS
  10094. return xpath_node_set();
  10095. #else
  10096. throw std::bad_alloc();
  10097. #endif
  10098. }
  10099. return xpath_node_set(r.begin(), r.end(), r.type());
  10100. }
  10101. PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
  10102. {
  10103. impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
  10104. if (!root) return xpath_node();
  10105. impl::xpath_context c(n, 1, 1);
  10106. impl::xpath_stack_data sd;
  10107. impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
  10108. if (sd.oom)
  10109. {
  10110. #ifdef PUGIXML_NO_EXCEPTIONS
  10111. return xpath_node();
  10112. #else
  10113. throw std::bad_alloc();
  10114. #endif
  10115. }
  10116. return r.first();
  10117. }
  10118. PUGI__FN const xpath_parse_result& xpath_query::result() const
  10119. {
  10120. return _result;
  10121. }
  10122. PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
  10123. {
  10124. }
  10125. PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
  10126. {
  10127. return _impl ? unspecified_bool_xpath_query : 0;
  10128. }
  10129. PUGI__FN bool xpath_query::operator!() const
  10130. {
  10131. return !_impl;
  10132. }
  10133. PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
  10134. {
  10135. xpath_query q(query, variables);
  10136. return q.evaluate_node(*this);
  10137. }
  10138. PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
  10139. {
  10140. return query.evaluate_node(*this);
  10141. }
  10142. PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
  10143. {
  10144. xpath_query q(query, variables);
  10145. return q.evaluate_node_set(*this);
  10146. }
  10147. PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
  10148. {
  10149. return query.evaluate_node_set(*this);
  10150. }
  10151. PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
  10152. {
  10153. xpath_query q(query, variables);
  10154. return q.evaluate_node(*this);
  10155. }
  10156. PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
  10157. {
  10158. return query.evaluate_node(*this);
  10159. }
  10160. }
  10161. #endif
  10162. #ifdef __BORLANDC__
  10163. # pragma option pop
  10164. #endif
  10165. // Intel C++ does not properly keep warning state for function templates,
  10166. // so popping warning state at the end of translation unit leads to warnings in the middle.
  10167. #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
  10168. # pragma warning(pop)
  10169. #endif
  10170. #if defined(_MSC_VER) && defined(__c2__)
  10171. # pragma clang diagnostic pop
  10172. #endif
  10173. // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
  10174. #undef PUGI__NO_INLINE
  10175. #undef PUGI__UNLIKELY
  10176. #undef PUGI__STATIC_ASSERT
  10177. #undef PUGI__DMC_VOLATILE
  10178. #undef PUGI__UNSIGNED_OVERFLOW
  10179. #undef PUGI__MSVC_CRT_VERSION
  10180. #undef PUGI__SNPRINTF
  10181. #undef PUGI__NS_BEGIN
  10182. #undef PUGI__NS_END
  10183. #undef PUGI__FN
  10184. #undef PUGI__FN_NO_INLINE
  10185. #undef PUGI__GETHEADER_IMPL
  10186. #undef PUGI__GETPAGE_IMPL
  10187. #undef PUGI__GETPAGE
  10188. #undef PUGI__NODETYPE
  10189. #undef PUGI__IS_CHARTYPE_IMPL
  10190. #undef PUGI__IS_CHARTYPE
  10191. #undef PUGI__IS_CHARTYPEX
  10192. #undef PUGI__ENDSWITH
  10193. #undef PUGI__SKIPWS
  10194. #undef PUGI__OPTSET
  10195. #undef PUGI__PUSHNODE
  10196. #undef PUGI__POPNODE
  10197. #undef PUGI__SCANFOR
  10198. #undef PUGI__SCANWHILE
  10199. #undef PUGI__SCANWHILE_UNROLL
  10200. #undef PUGI__ENDSEG
  10201. #undef PUGI__THROW_ERROR
  10202. #undef PUGI__CHECK_ERROR
  10203. #endif
  10204. /**
  10205. * Copyright (c) 2006-2022 Arseny Kapoulkine
  10206. *
  10207. * Permission is hereby granted, free of charge, to any person
  10208. * obtaining a copy of this software and associated documentation
  10209. * files (the "Software"), to deal in the Software without
  10210. * restriction, including without limitation the rights to use,
  10211. * copy, modify, merge, publish, distribute, sublicense, and/or sell
  10212. * copies of the Software, and to permit persons to whom the
  10213. * Software is furnished to do so, subject to the following
  10214. * conditions:
  10215. *
  10216. * The above copyright notice and this permission notice shall be
  10217. * included in all copies or substantial portions of the Software.
  10218. *
  10219. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  10220. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  10221. * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  10222. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  10223. * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  10224. * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  10225. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  10226. * OTHER DEALINGS IN THE SOFTWARE.
  10227. */