Nick Kralevich | 0f17c8f | 2014-09-27 12:41:49 -0700 | [diff] [blame] | 1 | { |
| 2 | pcRegExp - Perl compatible regular expressions for Virtual Pascal |
| 3 | (c) 2001 Peter S. Voronov aka Chem O'Dun <petervrn@yahoo.com> |
| 4 | |
| 5 | Based on PCRE library interface unit for Virtual Pascal. |
| 6 | (c) 2001 Alexander Tokarev <dwalin@dwalin.ru> |
| 7 | |
| 8 | The current PCRE version is: 3.7 |
| 9 | |
| 10 | This software may be distributed under the terms of the modified BSD license |
| 11 | Copyright (c) 2001, Alexander Tokarev |
| 12 | All rights reserved. |
| 13 | |
| 14 | Redistribution and use in source and binary forms, with or without |
| 15 | modification, are permitted provided that the following conditions are met: |
| 16 | |
| 17 | * Redistributions of source code must retain the above copyright notice, |
| 18 | this list of conditions and the following disclaimer. |
| 19 | * Redistributions in binary form must reproduce the above copyright notice, |
| 20 | this list of conditions and the following disclaimer in the documentation |
| 21 | and/or other materials provided with the distribution. |
| 22 | * Neither the name of the <ORGANIZATION> nor the names of its contributors |
| 23 | may be used to endorse or promote products derived from this software without |
| 24 | specific prior written permission. |
| 25 | |
| 26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| 27 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| 28 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| 29 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
| 30 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 31 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| 32 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| 33 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 34 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 35 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 36 | |
| 37 | The PCRE library is written by: Philip Hazel <ph10@cam.ac.uk> |
| 38 | Copyright (c) 1997-2004 University of Cambridge |
| 39 | |
| 40 | AngelsHolocaust 4-11-04 updated to use version v5.0 |
| 41 | (INFO: this is regex-directed, NFA) |
| 42 | AH: 9-11-04 - pcre_free: removed var, pcre already gives the ptr, now |
| 43 | everything works as it should (no more crashes) |
| 44 | -> removed CheckRegExp because pcre handles errors perfectly |
| 45 | 10-11-04 - added pcError (errorhandling), pcInit |
| 46 | 13-11-04 - removed the ErrorPos = 0 check -> always print erroroffset |
| 47 | 17-10-05 - support for \1-\9 backreferences in TpcRegExp.GetReplStr |
| 48 | 17-02-06 - added RunTimeOptions: caller can set options while searching |
| 49 | 19-02-06 - added SearchOfs(): let PCRE use the complete string and offset |
| 50 | into the string itself |
| 51 | 20-12-06 - support for version 7.0 |
| 52 | 27.08.08 - support for v7.7 |
| 53 | } |
| 54 | |
| 55 | {$H+} {$DEFINE PCRE_3_7} {$DEFINE PCRE_5_0} {$DEFINE PCRE_7_0} {$DEFINE PCRE_7_7} |
| 56 | |
| 57 | Unit pcregexp; |
| 58 | |
| 59 | Interface |
| 60 | |
| 61 | uses objects; |
| 62 | |
| 63 | Type |
| 64 | PpcRegExp = ^TpcRegExp; |
| 65 | // TpcRegExp = object |
| 66 | TpcRegExp = object(TObject) |
| 67 | MatchesCount: integer; |
| 68 | RegExpC, RegExpExt : Pointer; |
| 69 | Matches:Pointer; |
| 70 | RegExp: shortstring; |
| 71 | SourceLen: integer; |
| 72 | PartialMatch : boolean; |
| 73 | Error : boolean; |
| 74 | ErrorMsg : Pchar; |
| 75 | ErrorPos : integer; |
| 76 | RunTimeOptions: Integer; // options which can be set by the caller |
| 77 | constructor Init(const ARegExp : shortstring; AOptions : integer; ALocale : Pointer); |
| 78 | function Search(AStr: Pchar; ALen : longint) : boolean; virtual; |
| 79 | function SearchNext( AStr: Pchar; ALen : longint) : boolean; virtual; |
| 80 | function SearchOfs ( AStr: Pchar; ALen, AOfs : longint) : boolean; virtual; |
| 81 | function MatchSub(ANom: integer; var Pos, Len : longint) : boolean; virtual; |
| 82 | function MatchFull(var Pos, Len : longint) : boolean; virtual; |
| 83 | function GetSubStr(ANom: integer; AStr: Pchar) : string; virtual; |
| 84 | function GetFullStr(AStr: Pchar) : string; virtual; |
| 85 | function GetReplStr(AStr: Pchar; const ARepl: string) : string; virtual; |
| 86 | function GetPreSubStr(AStr: Pchar) : string; virtual; |
| 87 | function GetPostSubStr(AStr: Pchar) : string; virtual; |
| 88 | function ErrorStr : string; virtual; |
| 89 | destructor Done; virtual; |
| 90 | end; |
| 91 | |
| 92 | function pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer): Boolean; |
| 93 | function pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer): string; |
| 94 | |
| 95 | function pcFastGrepMatch(WildCard, aStr: string): Boolean; |
| 96 | function pcFastGrepSub(WildCard, aStr, aRepl: string): string; |
| 97 | |
| 98 | {$IFDEF PCRE_5_0} |
| 99 | function pcGetVersion : pchar; |
| 100 | {$ENDIF} |
| 101 | |
| 102 | function pcError (var pRegExp : Pointer) : Boolean; |
| 103 | function pcInit (const Pattern: Shortstring; CaseSens: Boolean) : Pointer; |
| 104 | |
| 105 | Const { Options } |
| 106 | PCRE_CASELESS = $0001; |
| 107 | PCRE_MULTILINE = $0002; |
| 108 | PCRE_DOTALL = $0004; |
| 109 | PCRE_EXTENDED = $0008; |
| 110 | PCRE_ANCHORED = $0010; |
| 111 | PCRE_DOLLAR_ENDONLY = $0020; |
| 112 | PCRE_EXTRA = $0040; |
| 113 | PCRE_NOTBOL = $0080; |
| 114 | PCRE_NOTEOL = $0100; |
| 115 | PCRE_UNGREEDY = $0200; |
| 116 | PCRE_NOTEMPTY = $0400; |
| 117 | {$IFDEF PCRE_5_0} |
| 118 | PCRE_UTF8 = $0800; |
| 119 | PCRE_NO_AUTO_CAPTURE = $1000; |
| 120 | PCRE_NO_UTF8_CHECK = $2000; |
| 121 | PCRE_AUTO_CALLOUT = $4000; |
| 122 | PCRE_PARTIAL = $8000; |
| 123 | {$ENDIF} |
| 124 | {$IFDEF PCRE_7_0} |
| 125 | PCRE_DFA_SHORTEST = $00010000; |
| 126 | PCRE_DFA_RESTART = $00020000; |
| 127 | PCRE_FIRSTLINE = $00040000; |
| 128 | PCRE_DUPNAMES = $00080000; |
| 129 | PCRE_NEWLINE_CR = $00100000; |
| 130 | PCRE_NEWLINE_LF = $00200000; |
| 131 | PCRE_NEWLINE_CRLF = $00300000; |
| 132 | PCRE_NEWLINE_ANY = $00400000; |
| 133 | PCRE_NEWLINE_ANYCRLF = $00500000; |
| 134 | |
| 135 | PCRE_NEWLINE_BITS = PCRE_NEWLINE_CR or PCRE_NEWLINE_LF or PCRE_NEWLINE_ANY; |
| 136 | |
| 137 | {$ENDIF} |
| 138 | {$IFDEF PCRE_7_7} |
| 139 | PCRE_BSR_ANYCRLF = $00800000; |
| 140 | PCRE_BSR_UNICODE = $01000000; |
| 141 | PCRE_JAVASCRIPT_COMPAT= $02000000; |
| 142 | {$ENDIF} |
| 143 | |
| 144 | PCRE_COMPILE_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_AUTO_CALLOUT + PCRE_CASELESS + |
| 145 | PCRE_DOLLAR_ENDONLY + PCRE_DOTALL + PCRE_EXTENDED + |
| 146 | PCRE_EXTRA + PCRE_MULTILINE + PCRE_NO_AUTO_CAPTURE + |
| 147 | PCRE_UNGREEDY + PCRE_UTF8 + PCRE_NO_UTF8_CHECK |
| 148 | {$IFDEF PCRE_7_0} |
| 149 | + PCRE_DUPNAMES + PCRE_FIRSTLINE + PCRE_NEWLINE_BITS |
| 150 | {$ENDIF} |
| 151 | {$IFDEF PCRE_7_7} |
| 152 | + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE + PCRE_JAVASCRIPT_COMPAT |
| 153 | {$ENDIF} |
| 154 | ; |
| 155 | |
| 156 | PCRE_EXEC_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_NOTBOL + PCRE_NOTEOL + |
| 157 | PCRE_NOTEMPTY + PCRE_NO_UTF8_CHECK + PCRE_PARTIAL |
| 158 | {$IFDEF PCRE_7_0} |
| 159 | + PCRE_NEWLINE_BITS |
| 160 | {$ENDIF} |
| 161 | {$IFDEF PCRE_7_7} |
| 162 | + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE |
| 163 | {$ENDIF} |
| 164 | ; |
| 165 | |
| 166 | {$IFDEF PCRE_7_0} |
| 167 | PCRE_DFA_EXEC_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_NOTBOL + PCRE_NOTEOL + |
| 168 | PCRE_NOTEMPTY + PCRE_NO_UTF8_CHECK + PCRE_PARTIAL + |
| 169 | PCRE_DFA_SHORTEST + PCRE_DFA_RESTART + |
| 170 | PCRE_NEWLINE_BITS |
| 171 | {$IFDEF PCRE_7_7} |
| 172 | + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE |
| 173 | {$ENDIF} |
| 174 | ; |
| 175 | {$ENDIF} |
| 176 | |
| 177 | { Exec-time and get/set-time error codes } |
| 178 | PCRE_ERROR_NOMATCH = -1; |
| 179 | PCRE_ERROR_NULL = -2; |
| 180 | PCRE_ERROR_BADOPTION = -3; |
| 181 | PCRE_ERROR_BADMAGIC = -4; |
| 182 | PCRE_ERROR_UNKNOWN_MODE = -5; |
| 183 | PCRE_ERROR_NOMEMORY = -6; |
| 184 | PCRE_ERROR_NOSUBSTRING = -7; |
| 185 | {$IFDEF PCRE_5_0} |
| 186 | PCRE_ERROR_MATCHLIMIT = -8; |
| 187 | PCRE_ERROR_CALLOUT = -9; { Never used by PCRE itself } |
| 188 | PCRE_ERROR_BADUTF8 = -10; |
| 189 | PCRE_ERROR_BADUTF8_OFFSET = -11; |
| 190 | PCRE_ERROR_PARTIAL = -12; |
| 191 | PCRE_ERROR_BADPARTIAL = -13; |
| 192 | PCRE_ERROR_INTERNAL = -14; |
| 193 | PCRE_ERROR_BADCOUNT = -15; |
| 194 | {$ENDIF} |
| 195 | {$IFDEF PCRE_7_0} |
| 196 | PCRE_ERROR_DFA_UITEM = -16; |
| 197 | PCRE_ERROR_DFA_UCOND = -17; |
| 198 | PCRE_ERROR_DFA_UMLIMIT = -18; |
| 199 | PCRE_ERROR_DFA_WSSIZE = -19; |
| 200 | PCRE_ERROR_DFA_RECURSE = -20; |
| 201 | PCRE_ERROR_RECURSIONLIMIT = -21; |
| 202 | PCRE_ERROR_NULLWSLIMIT = -22; |
| 203 | PCRE_ERROR_BADNEWLINE = -23; |
| 204 | {$ENDIF} |
| 205 | |
| 206 | { Request types for pcre_fullinfo() } |
| 207 | |
| 208 | PCRE_INFO_OPTIONS = 0; |
| 209 | PCRE_INFO_SIZE = 1; |
| 210 | PCRE_INFO_CAPTURECOUNT = 2; |
| 211 | PCRE_INFO_BACKREFMAX = 3; |
| 212 | PCRE_INFO_FIRSTBYTE = 4; |
| 213 | PCRE_INFO_FIRSTCHAR = 4; { For backwards compatibility } |
| 214 | PCRE_INFO_FIRSTTABLE = 5; |
| 215 | {$IFDEF PCRE_5_0} |
| 216 | PCRE_INFO_LASTLITERAL = 6; |
| 217 | PCRE_INFO_NAMEENTRYSIZE = 7; |
| 218 | PCRE_INFO_NAMECOUNT = 8; |
| 219 | PCRE_INFO_NAMETABLE = 9; |
| 220 | PCRE_INFO_STUDYSIZE = 10; |
| 221 | PCRE_INFO_DEFAULT_TABLES = 11; |
| 222 | {$ENDIF PCRE_5_0} |
| 223 | {$IFDEF PCRE_7_7} |
| 224 | PCRE_INFO_OKPARTIAL = 12; |
| 225 | PCRE_INFO_JCHANGED = 13; |
| 226 | PCRE_INFO_HASCRORLF = 14; |
| 227 | {$ENDIF} |
| 228 | |
| 229 | { Request types for pcre_config() } |
| 230 | {$IFDEF PCRE_5_0} |
| 231 | PCRE_CONFIG_UTF8 = 0; |
| 232 | PCRE_CONFIG_NEWLINE = 1; |
| 233 | PCRE_CONFIG_LINK_SIZE = 2; |
| 234 | PCRE_CONFIG_POSIX_MALLOC_THRESHOLD = 3; |
| 235 | PCRE_CONFIG_MATCH_LIMIT = 4; |
| 236 | PCRE_CONFIG_STACKRECURSE = 5; |
| 237 | PCRE_CONFIG_UNICODE_PROPERTIES = 6; |
| 238 | {$ENDIF PCRE_5_0} |
| 239 | {$IFDEF PCRE_7_0} |
| 240 | PCRE_CONFIG_MATCH_LIMIT_RECURSION = 7; |
| 241 | {$ENDIF} |
| 242 | {$IFDEF PCRE_7_7} |
| 243 | PCRE_CONFIG_BSR = 8; |
| 244 | {$ENDIF} |
| 245 | |
| 246 | { Bit flags for the pcre_extra structure } |
| 247 | {$IFDEF PCRE_5_0} |
| 248 | PCRE_EXTRA_STUDY_DATA = $0001; |
| 249 | PCRE_EXTRA_MATCH_LIMIT = $0002; |
| 250 | PCRE_EXTRA_CALLOUT_DATA = $0004; |
| 251 | PCRE_EXTRA_TABLES = $0008; |
| 252 | {$ENDIF PCRE_5_0} |
| 253 | {$IFDEF PCRE_7_0} |
| 254 | PCRE_EXTRA_MATCH_LIMIT_RECURSION = $0010; |
| 255 | {$ENDIF} |
| 256 | |
| 257 | Const |
| 258 | // DefaultOptions : integer = 0; |
| 259 | DefaultLocaleTable : pointer = nil; |
| 260 | |
| 261 | {$IFDEF PCRE_5_0} |
| 262 | { The structure for passing additional data to pcre_exec(). This is defined in |
| 263 | such as way as to be extensible. Always add new fields at the end, in order to |
| 264 | remain compatible. } |
| 265 | |
| 266 | type ppcre_extra = ^tpcre_extra; |
| 267 | tpcre_extra = record |
| 268 | flags : longint; { Bits for which fields are set } |
| 269 | study_data : pointer; { Opaque data from pcre_study() } |
| 270 | match_limit : longint; { Maximum number of calls to match() } |
| 271 | callout_data : pointer; { Data passed back in callouts } |
| 272 | tables : pointer; { Pointer to character tables } |
| 273 | match_limit_recursion: longint; { Max recursive calls to match() } |
| 274 | end; |
| 275 | |
| 276 | type ppcre_callout_block = ^pcre_callout_block; |
| 277 | pcre_callout_block = record |
| 278 | version, |
| 279 | (* ------------------------ Version 0 ------------------------------- *) |
| 280 | callout_number : integer; |
| 281 | offset_vector : pointer; |
| 282 | subject : pchar; |
| 283 | subject_length, start_match, current_position, capture_top, |
| 284 | capture_last : integer; |
| 285 | callout_data : pointer; |
| 286 | (* ------------------- Added for Version 1 -------------------------- *) |
| 287 | pattern_position, next_item_length : integer; |
| 288 | end; |
| 289 | {$ENDIF PCRE_5_0} |
| 290 | |
| 291 | {$OrgName+} |
| 292 | {$IFDEF VIRTUALPASCAL} {&Cdecl+} {$ENDIF VIRTUALPASCAL} |
| 293 | |
| 294 | { local replacement of external pcre memory management functions } |
| 295 | function pcre_malloc( size : integer ) : pointer; |
| 296 | procedure pcre_free( {var} p : pointer ); |
| 297 | {$IFDEF PCRE_5_0} |
| 298 | const pcre_stack_malloc: function ( size : integer ): pointer = pcre_malloc; |
| 299 | pcre_stack_free: procedure ( {var} p : pointer ) = pcre_free; |
| 300 | function pcre_callout(var p : ppcre_callout_block) : integer; |
| 301 | {$ENDIF PCRE_5_0} |
| 302 | {$IFDEF VIRTUALPASCAL} {&Cdecl-} {$ENDIF VIRTUALPASCAL} |
| 303 | |
| 304 | Implementation |
| 305 | |
| 306 | Uses strings, collect, messages, dnapp, commands, advance0, stringsx |
| 307 | {$IFDEF VIRTUALPASCAL} ,vpsyslow {$ENDIF VIRTUALPASCAL}; |
| 308 | |
| 309 | Const |
| 310 | MAGIC_NUMBER = $50435245; { 'PCRE' } |
| 311 | MAX_MATCHES = 90; { changed in 3.5 version; should be divisible by 3, was 64} |
| 312 | |
| 313 | Type |
| 314 | PMatchArray = ^TMatchArray; |
| 315 | TMatchArray = array[0..( MAX_MATCHES * 3 )] of integer; |
| 316 | |
| 317 | PRegExpCollection = ^TRegExpCollection; |
| 318 | TRegExpCollection = object(TSortedCollection) |
| 319 | MaxRegExp : integer; |
| 320 | SearchRegExp : shortstring; |
| 321 | CompareModeInsert : boolean; |
| 322 | constructor Init(AMaxRegExp:integer); |
| 323 | procedure FreeItem(P: Pointer); virtual; |
| 324 | function Compare(P1, P2: Pointer): Integer; virtual; |
| 325 | function Find(ARegExp:shortstring;var P: PpcRegExp):boolean; virtual; |
| 326 | function CheckNew(ARegExp:shortstring):PpcRegExp;virtual; |
| 327 | end; |
| 328 | |
| 329 | Var |
| 330 | PRegExpCache : PRegExpCollection; |
| 331 | |
| 332 | |
| 333 | {$IFDEF VIRTUALPASCAL} {&Cdecl+} {$ENDIF VIRTUALPASCAL} |
| 334 | |
| 335 | { imported original pcre functions } |
| 336 | |
| 337 | function pcre_compile( const pattern : PChar; options : integer; |
| 338 | var errorptr : PChar; var erroroffset : integer; |
| 339 | const tables : PChar ) : pointer {pcre}; external; |
| 340 | {$IFDEF PCRE_7_0} |
| 341 | function pcre_compile2( const pattern : PChar; options : integer; |
| 342 | var errorcodeptr : Integer; |
| 343 | var errorptr : PChar; var erroroffset : integer; |
| 344 | const tables : PChar ) : pointer {pcre}; external; |
| 345 | {$ENDIF} |
| 346 | {$IFDEF PCRE_5_0} |
| 347 | function pcre_config( what : integer; where : pointer) : integer; external; |
| 348 | function pcre_copy_named_substring( const code : pointer {pcre}; |
| 349 | const subject : pchar; |
| 350 | var ovector : integer; |
| 351 | stringcount : integer; |
| 352 | const stringname : pchar; |
| 353 | var buffer : pchar; |
| 354 | size : integer) : integer; external; |
| 355 | function pcre_copy_substring( const subject : pchar; var ovector : integer; |
| 356 | stringcount, stringnumber : integer; |
| 357 | var buffer : pchar; size : integer ) |
| 358 | : integer; external; |
| 359 | function pcre_exec( const argument_re : pointer {pcre}; |
| 360 | const extra_data : pointer {pcre_extra}; |
| 361 | {$ELSE} |
| 362 | function pcre_exec( const external_re : pointer; |
| 363 | const external_extra : pointer; |
| 364 | {$ENDIF} |
| 365 | const subject : PChar; |
| 366 | length, start_offset, options : integer; |
| 367 | offsets : pointer; |
| 368 | offsetcount : integer ) : integer; external; |
| 369 | {$IFDEF PCRE_7_0} |
| 370 | function pcre_dfa_exec( const argument_re : pointer {pcre}; |
| 371 | const extra_data : pointer {pcre_extra}; |
| 372 | const subject : pchar; |
| 373 | length, start_offset, options : integer; |
| 374 | offsets : pointer; |
| 375 | offsetcount : integer; |
| 376 | workspace : pointer; |
| 377 | wscount : integer ) : integer; external; |
| 378 | {$ENDIF} |
| 379 | {$IFDEF PCRE_5_0} |
| 380 | procedure pcre_free_substring( const p : pchar ); external; |
| 381 | procedure pcre_free_substring_list( var p : pchar ); external; |
| 382 | function pcre_fullinfo( const argument_re : pointer {pcre}; |
| 383 | const extra_data : pointer {pcre_extra}; |
| 384 | what : integer; |
| 385 | where : pointer ) : integer; external; |
| 386 | function pcre_get_named_substring( const code : pointer {pcre}; |
| 387 | const subject : pchar; |
| 388 | var ovector : integer; |
| 389 | stringcount : integer; |
| 390 | const stringname : pchar; |
| 391 | var stringptr : pchar ) : integer; external; |
| 392 | function pcre_get_stringnumber( const code : pointer {pcre}; |
| 393 | const stringname : pchar ) : integer; external; |
| 394 | function pcre_get_stringtable_entries( const code : pointer {pcre}; |
| 395 | const stringname : pchar; |
| 396 | var firstptr, |
| 397 | lastptr : pchar ) : integer; external; |
| 398 | function pcre_get_substring( const subject : pchar; var ovector : integer; |
| 399 | stringcount, stringnumber : integer; |
| 400 | var stringptr : pchar ) : integer; external; |
| 401 | function pcre_get_substring_list( const subject : pchar; var ovector : integer; |
| 402 | stringcount : integer; |
| 403 | listptr : pointer {const char ***listptr}) : integer; external; |
| 404 | function pcre_info( const argument_re : pointer {pcre}; |
| 405 | var optptr : integer; |
| 406 | var first_byte : integer ) : integer; external; |
| 407 | function pcre_maketables : pchar; external; |
| 408 | {$ENDIF} |
| 409 | {$IFDEF PCRE_7_0} |
| 410 | function pcre_refcount( const argument_re : pointer {pcre}; |
| 411 | adjust : integer ) : pchar; external; |
| 412 | {$ENDIF} |
| 413 | function pcre_study( const external_re : pointer {pcre}; |
| 414 | options : integer; |
| 415 | var errorptr : PChar ) : pointer {pcre_extra}; external; |
| 416 | {$IFDEF PCRE_5_0} |
| 417 | function pcre_version : pchar; external; |
| 418 | {$ENDIF} |
| 419 | |
| 420 | function pcre_malloc( size : integer ) : pointer; |
| 421 | begin |
| 422 | GetMem( result, size ); |
| 423 | end; |
| 424 | |
| 425 | procedure pcre_free( {var} p : pointer ); |
| 426 | begin |
| 427 | if (p <> nil) then |
| 428 | FreeMem( p, 0 ); |
| 429 | {@p := nil;} |
| 430 | end; |
| 431 | |
| 432 | {$IFDEF PCRE_5_0} |
| 433 | (* Called from PCRE as a result of the (?C) item. We print out where we are in |
| 434 | the match. Yield zero unless more callouts than the fail count, or the callout |
| 435 | data is not zero. *) |
| 436 | |
| 437 | function pcre_callout; |
| 438 | begin |
| 439 | end; |
| 440 | {$ENDIF} |
| 441 | |
| 442 | {$IFDEF VIRTUALPASCAL} {&Cdecl-} {$ENDIF VIRTUALPASCAL} |
| 443 | |
| 444 | // Always include the newest version of the library |
| 445 | {$IFDEF PCRE_7_7} |
| 446 | {$L pcre77.lib} |
| 447 | {$ELSE} |
| 448 | {$IFDEF PCRE_7_0} |
| 449 | {$L pcre70.lib} |
| 450 | {$ELSE} |
| 451 | {$IFDEF PCRE_5_0} |
| 452 | {$L pcre50.lib} |
| 453 | {$ELSE} |
| 454 | {$IFDEF PCRE_3_7} |
| 455 | {$L pcre37.lib} |
| 456 | {$ENDIF PCRE_3_7} |
| 457 | {$ENDIF PCRE_5_0} |
| 458 | {$ENDIF PCRE_7_0} |
| 459 | {$ENDIF PCRE_7_7} |
| 460 | |
| 461 | {TpcRegExp} |
| 462 | |
| 463 | constructor TpcRegExp.Init(const ARegExp:shortstring; AOptions:integer; ALocale : Pointer); |
| 464 | var |
| 465 | pRegExp : PChar; |
| 466 | begin |
| 467 | RegExp:=ARegExp; |
| 468 | RegExpC:=nil; |
| 469 | RegExpExt:=nil; |
| 470 | Matches:=nil; |
| 471 | MatchesCount:=0; |
| 472 | Error:=true; |
| 473 | ErrorMsg:=nil; |
| 474 | ErrorPos:=0; |
| 475 | RunTimeOptions := 0; |
| 476 | if length(RegExp) < 255 then |
| 477 | begin |
| 478 | RegExp[length(RegExp)+1]:=#0; |
| 479 | pRegExp:=@RegExp[1]; |
| 480 | end |
| 481 | else |
| 482 | begin |
| 483 | GetMem(pRegExp,length(RegExp)+1); |
| 484 | pRegExp:=strpcopy(pRegExp,RegExp); |
| 485 | end; |
| 486 | RegExpC := pcre_compile( pRegExp, |
| 487 | AOptions and PCRE_COMPILE_ALLOWED_OPTIONS, |
| 488 | ErrorMsg, ErrorPos, ALocale); |
| 489 | if length(RegExp) = 255 then |
| 490 | StrDispose(pRegExp); |
| 491 | if RegExpC = nil then |
| 492 | exit; |
| 493 | ErrorMsg:=nil; |
| 494 | RegExpExt := pcre_study( RegExpC, 0, ErrorMsg ); |
| 495 | if (RegExpExt = nil) and (ErrorMsg <> nil) then |
| 496 | begin |
| 497 | pcre_free(RegExpC); |
| 498 | exit; |
| 499 | end; |
| 500 | GetMem(Matches,SizeOf(TMatchArray)); |
| 501 | Error:=false; |
| 502 | end; |
| 503 | |
| 504 | destructor TpcRegExp.Done; |
| 505 | begin |
| 506 | if RegExpC <> nil then |
| 507 | pcre_free(RegExpC); |
| 508 | if RegExpExt <> nil then |
| 509 | pcre_free(RegExpExt); |
| 510 | if Matches <> nil then |
| 511 | FreeMem(Matches,SizeOf(TMatchArray)); |
| 512 | end; |
| 513 | |
| 514 | function TpcRegExp.SearchNext( AStr: Pchar; ALen : longint ) : boolean; |
| 515 | var Options: Integer; |
| 516 | begin // must handle PCRE_ERROR_PARTIAL here |
| 517 | Options := (RunTimeOptions or startup.MiscMultiData.cfgRegEx.DefaultOptions) and |
| 518 | PCRE_EXEC_ALLOWED_OPTIONS; |
| 519 | if MatchesCount > 0 then |
| 520 | MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, PMatchArray(Matches)^[1], |
| 521 | Options, Matches, MAX_MATCHES ) else |
| 522 | MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, 0, |
| 523 | Options, Matches, MAX_MATCHES ); |
| 524 | { if MatchesCount = 0 then |
| 525 | MatchesCount := MatchesCount div 3;} |
| 526 | PartialMatch := MatchesCount = PCRE_ERROR_PARTIAL; |
| 527 | SearchNext := MatchesCount > 0; |
| 528 | end; |
| 529 | |
| 530 | function TpcRegExp.Search( AStr: Pchar; ALen : longint):boolean; |
| 531 | begin |
| 532 | MatchesCount:=0; |
| 533 | Search:=SearchNext(AStr,ALen); |
| 534 | SourceLen:=ALen; |
| 535 | end; |
| 536 | |
| 537 | function TpcRegExp.SearchOfs( AStr: Pchar; ALen, AOfs: longint ) : boolean; |
| 538 | var Options: Integer; |
| 539 | begin |
| 540 | MatchesCount:=0; |
| 541 | Options := (RunTimeOptions or startup.MiscMultiData.cfgRegEx.DefaultOptions) and |
| 542 | PCRE_EXEC_ALLOWED_OPTIONS; |
| 543 | MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, AOfs, |
| 544 | Options, Matches, MAX_MATCHES ); |
| 545 | PartialMatch := MatchesCount = PCRE_ERROR_PARTIAL; |
| 546 | SearchOfs := MatchesCount > 0; |
| 547 | SourceLen := ALen-AOfs; |
| 548 | end; |
| 549 | |
| 550 | function TpcRegExp.MatchSub(ANom:integer; var Pos,Len:longint):boolean; |
| 551 | begin |
| 552 | if (MatchesCount > 0) and (ANom <= (MatchesCount-1)) then |
| 553 | begin |
| 554 | ANom:=ANom*2; |
| 555 | Pos:=PMatchArray(Matches)^[ANom]; |
| 556 | Len:=PMatchArray(Matches)^[ANom+1]-Pos; |
| 557 | MatchSub:=true; |
| 558 | end |
| 559 | else |
| 560 | MatchSub:=false; |
| 561 | end; |
| 562 | |
| 563 | function TpcRegExp.MatchFull(var Pos,Len:longint):boolean; |
| 564 | begin |
| 565 | MatchFull:=MatchSub(0,Pos,Len); |
| 566 | end; |
| 567 | |
| 568 | function TpcRegExp.GetSubStr(ANom: integer; AStr: Pchar):string; |
| 569 | var |
| 570 | s: ansistring; |
| 571 | pos,len: longint; |
| 572 | begin |
| 573 | s:=''; |
| 574 | if MatchSub(ANom, pos, len) then |
| 575 | begin |
| 576 | setlength(s, len); |
| 577 | Move(AStr[pos], s[1], len); |
| 578 | end; |
| 579 | GetSubStr:=s; |
| 580 | end; |
| 581 | |
| 582 | function TpcRegExp.GetPreSubStr(AStr: Pchar):string; |
| 583 | var |
| 584 | s: ansistring; |
| 585 | l: longint; |
| 586 | begin |
| 587 | s:=''; |
| 588 | if (MatchesCount > 0) then |
| 589 | begin |
| 590 | l:=PMatchArray(Matches)^[0]-1; |
| 591 | if l > 0 then |
| 592 | begin |
| 593 | setlength(s,l); |
| 594 | Move(AStr[1],s[1],l); |
| 595 | end; |
| 596 | end; |
| 597 | GetPreSubStr:=s; |
| 598 | end; |
| 599 | |
| 600 | function TpcRegExp.GetPostSubStr(AStr: Pchar):string; |
| 601 | var |
| 602 | s: ansistring; |
| 603 | l: longint; |
| 604 | ANom: integer; |
| 605 | begin |
| 606 | s:=''; |
| 607 | if (MatchesCount > 0) then |
| 608 | begin |
| 609 | ANom:=(MatchesCount-1){*2} shl 1; |
| 610 | l:=SourceLen-PMatchArray(Matches)^[ANom+1]+1; |
| 611 | if l > 0 then |
| 612 | begin |
| 613 | setlength(s,l); |
| 614 | Move(AStr[PMatchArray(Matches)^[ANom+1]],s[1],l); |
| 615 | end; |
| 616 | end; |
| 617 | GetPostSubStr:=s; |
| 618 | end; |
| 619 | |
| 620 | |
| 621 | function TpcRegExp.GetFullStr(AStr: Pchar):string; |
| 622 | var |
| 623 | s: ansistring; |
| 624 | l: longint; |
| 625 | begin |
| 626 | GetFullStr:=GetSubStr(0,AStr); |
| 627 | end; |
| 628 | |
| 629 | function TpcRegExp.GetReplStr(AStr: Pchar; const ARepl: string):string; |
| 630 | var |
| 631 | s: ansistring; |
| 632 | l,i,lasti: longint; |
| 633 | begin |
| 634 | l:=length(ARepl); |
| 635 | i:=1; |
| 636 | lasti:=1; |
| 637 | s:=''; |
| 638 | while i <= l do |
| 639 | begin |
| 640 | case ARepl[i] of |
| 641 | '\' : |
| 642 | begin |
| 643 | if i < l then |
| 644 | begin |
| 645 | s:=s+copy(ARepl,lasti,i-lasti){+ARepl[i+1]}; |
| 646 | {AH 17-10-05 support for POSIX \1-\9 backreferences} |
| 647 | case ARepl[i+1] of |
| 648 | '0' : s:=s+GetFullStr(AStr); |
| 649 | '1'..'9' : s:=s+GetSubStr(ord(ARepl[i+1])-ord('0'),AStr); |
| 650 | else s:=s+ARepl[i+1]; // copy the escaped character |
| 651 | end; |
| 652 | end; |
| 653 | inc(i); |
| 654 | lasti:=i+1; |
| 655 | end; |
| 656 | '$' : |
| 657 | begin |
| 658 | if i < l then |
| 659 | begin |
| 660 | s:=s+copy(ARepl,lasti,i-lasti); |
| 661 | case ARepl[i+1] of |
| 662 | '&' : s:=s+GetFullStr(AStr); |
| 663 | '1'..'9' : s:=s+GetSubStr(ord(ARepl[i+1])-ord('0'),AStr); |
| 664 | '`' : s:=s+GetPreSubStr(AStr); |
| 665 | #39 : s:=s+GetPostSubStr(AStr); |
| 666 | end; |
| 667 | end; |
| 668 | inc(i); |
| 669 | lasti:=i+1; |
| 670 | end; |
| 671 | end; |
| 672 | inc(i); |
| 673 | end; |
| 674 | if lasti <= {AH 25-10-2004 added =, else l==1 won't work} l then |
| 675 | s:=s+copy(ARepl,lasti,l-lasti+1); |
| 676 | GetReplStr:=s; |
| 677 | end; |
| 678 | |
| 679 | function TpcRegExp.ErrorStr:string; |
| 680 | begin |
| 681 | ErrorStr:=StrPas(ErrorMsg); |
| 682 | end; |
| 683 | |
| 684 | {TRegExpCollection} |
| 685 | |
| 686 | constructor TRegExpCollection.Init(AMaxRegExp: integer); |
| 687 | begin |
| 688 | Inherited Init(1,1); |
| 689 | MaxRegExp:=AMaxRegExp; |
| 690 | CompareModeInsert:=true; |
| 691 | end; |
| 692 | |
| 693 | procedure TRegExpCollection.FreeItem(P: Pointer); |
| 694 | begin |
| 695 | if P <> nil then |
| 696 | begin |
| 697 | Dispose(PpcRegExp(P),Done); |
| 698 | end; |
| 699 | end; |
| 700 | |
| 701 | function TRegExpCollection.Compare(P1, P2: Pointer): Integer; |
| 702 | //var |
| 703 | // l,l1,l2,i : byte; |
| 704 | //// wPos: pchar; |
| 705 | begin |
| 706 | if CompareModeInsert then |
| 707 | begin |
| 708 | // l1:=length(PpcRegExp(P1)^.RegExp); |
| 709 | // l2:=length(PpcRegExp(P2)^.RegExp); |
| 710 | // if l1 > l2 then l:=l2 else |
| 711 | // l:=l1; |
| 712 | // for i:=1 to l do |
| 713 | // if PpcRegExp(P1).RegExp[i] <> PpcRegExp(P2).RegExp[i] then break; |
| 714 | // if i <=l then |
| 715 | // Compare:=ord(PpcRegExp(P1).RegExp[i])-ord(PpcRegExp(P2).RegExp[i]) else |
| 716 | // Compare:=l1-l2; |
| 717 | Compare := stringsx.PasStrCmp(PpcRegExp(P1).RegExp, PpcRegExp(P2).RegExp, False); |
| 718 | end |
| 719 | else |
| 720 | begin |
| 721 | // l1:=length(PpcRegExp(P1)^.RegExp); |
| 722 | // l2:=length(SearchRegExp); |
| 723 | // if l1 > l2 then l:=l2 else |
| 724 | // l:=l1; |
| 725 | // for i:=1 to l do |
| 726 | // if PpcRegExp(P1).RegExp[i] <> SearchRegExp[i] then |
| 727 | // begin |
| 728 | // Compare:=ord(PpcRegExp(P1).RegExp[i])-ord(SearchRegExp[i]); |
| 729 | // break; |
| 730 | // end; |
| 731 | // if i > l then Compare:=l1-l2; |
| 732 | Compare := stringsx.PasStrCmp(PpcRegExp(P1).RegExp, SearchRegExp, False); |
| 733 | end; |
| 734 | end; |
| 735 | |
| 736 | function TRegExpCollection.Find(ARegExp:shortstring;var P: PpcRegExp):boolean; |
| 737 | var I : integer; |
| 738 | begin |
| 739 | CompareModeInsert:=false; |
| 740 | SearchRegExp:=ARegExp; |
| 741 | if Search(nil,I) then |
| 742 | begin |
| 743 | P:=PpcRegExp(At(I)); |
| 744 | Find:=true; |
| 745 | end |
| 746 | else |
| 747 | begin |
| 748 | P:=nil; |
| 749 | Find:=false; |
| 750 | end; |
| 751 | CompareModeInsert:=true; |
| 752 | end; |
| 753 | |
| 754 | function TRegExpCollection.CheckNew(ARegExp:shortstring):PpcRegExp; |
| 755 | var |
| 756 | P : PpcRegExp; |
| 757 | begin |
| 758 | if not Find(ARegExp,P) then |
| 759 | begin |
| 760 | if Count = MaxRegExp then |
| 761 | AtFree(0); |
| 762 | P:=New(ppcRegExp,Init(ARegExp,PCRE_CASELESS,nil)); |
| 763 | Insert(P); |
| 764 | end; |
| 765 | CheckNew:=P; |
| 766 | end; |
| 767 | |
| 768 | function pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer): Boolean; |
| 769 | var |
| 770 | PpcRE:PpcRegExp; |
| 771 | begin |
| 772 | PpcRE:=New(ppcRegExp,Init(WildCard,AOptions,Alocale)); |
| 773 | pcGrepMatch:=PpcRE^.Search(pchar(AStr),Length(AStr)); |
| 774 | Dispose(PpcRE,Done); |
| 775 | end; |
| 776 | |
| 777 | function pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer): string; |
| 778 | var |
| 779 | PpcRE:PpcRegExp; |
| 780 | begin |
| 781 | PpcRE:=New(ppcRegExp,Init(WildCard,AOptions,Alocale)); |
| 782 | if PpcRE^.Search(pchar(AStr),Length(AStr)) then |
| 783 | pcGrepSub:=PpcRE^.GetReplStr(pchar(AStr),ARepl) |
| 784 | else |
| 785 | pcGrepSub:=''; |
| 786 | Dispose(PpcRE,Done); |
| 787 | end; |
| 788 | |
| 789 | function pcFastGrepMatch(WildCard, aStr: string): Boolean; |
| 790 | var |
| 791 | PpcRE:PpcRegExp; |
| 792 | begin |
| 793 | PpcRE:=PRegExpCache^.CheckNew(WildCard); |
| 794 | pcFastGrepMatch:=PpcRE^.Search(pchar(AStr),Length(AStr)); |
| 795 | end; |
| 796 | |
| 797 | function pcFastGrepSub(WildCard, aStr, aRepl: string): string; |
| 798 | var |
| 799 | PpcRE:PpcRegExp; |
| 800 | begin |
| 801 | PpcRE:=PRegExpCache^.CheckNew(WildCard); |
| 802 | if PpcRE^.Search(pchar(AStr),Length(AStr)) then |
| 803 | pcFastGrepSub:=PpcRE^.GetReplStr(pchar(AStr),ARepl) |
| 804 | else |
| 805 | pcFastGrepSub:=''; |
| 806 | end; |
| 807 | |
| 808 | {$IFDEF PCRE_5_0} |
| 809 | function pcGetVersion : pchar; assembler; {$FRAME-}{$USES none} |
| 810 | asm |
| 811 | call pcre_version |
| 812 | end; |
| 813 | {$ENDIF PCRE_5_0} |
| 814 | |
| 815 | function pcError; |
| 816 | var P: ppcRegExp absolute pRegExp; |
| 817 | begin |
| 818 | Result := (P = nil) or P^.Error; |
| 819 | If Result and (P <> nil) then |
| 820 | begin |
| 821 | { if P^.ErrorPos = 0 then |
| 822 | MessageBox(GetString(erRegExpCompile)+'"'+P^.ErrorStr+'"', nil,mfConfirmation+mfOkButton) |
| 823 | else} |
| 824 | MessageBox(GetString(erRegExpCompile)+'"'+P^.ErrorStr+'"'+GetString(erRegExpCompPos), |
| 825 | @P^.ErrorPos,mfConfirmation+mfOkButton); |
| 826 | Dispose(P, Done); |
| 827 | P:=nil; |
| 828 | end; |
| 829 | end; |
| 830 | |
| 831 | function pcInit; |
| 832 | var Options : Integer; |
| 833 | begin |
| 834 | If CaseSens then Options := 0 else Options := PCRE_CASELESS; |
| 835 | Result := New( PpcRegExp, Init( Pattern, |
| 836 | {DefaultOptions} |
| 837 | startup.MiscMultiData.cfgRegEx.DefaultOptions or Options, |
| 838 | DefaultLocaleTable) ); |
| 839 | end; |
| 840 | |
| 841 | Initialization |
| 842 | PRegExpCache:=New(PRegExpCollection,Init(64)); |
| 843 | Finalization |
| 844 | Dispose(PRegExpCache,Done); |
| 845 | End. |