mirror of
https://github.com/alliedmodders/amxmodx.git
synced 2025-01-12 23:08:03 +03:00
Regex: Update PCRE to v8.35.
I was über lazy at first, so took libs from SM. But actually it's quite easy to compile, so let's update to latest version \o/.
This commit is contained in:
parent
d1153b8049
commit
d4de0e6f1e
@ -108,9 +108,9 @@ while (<STDIN>)
|
|||||||
|
|
||||||
# Handling .sp is subtle. If it is inside a literal section, do nothing if
|
# Handling .sp is subtle. If it is inside a literal section, do nothing if
|
||||||
# the next line is a non literal text line; similarly, if not inside a
|
# the next line is a non literal text line; similarly, if not inside a
|
||||||
# literal section, do nothing if a literal follows. The point being that
|
# literal section, do nothing if a literal follows, unless we are inside
|
||||||
# the <pre> and </pre> that delimit literal sections will do the spacing.
|
# a .nf/.ne section. The point being that the <pre> and </pre> that delimit
|
||||||
# Always skip if no previous output.
|
# literal sections will do the spacing. Always skip if no previous output.
|
||||||
|
|
||||||
elsif (/^\.sp/)
|
elsif (/^\.sp/)
|
||||||
{
|
{
|
||||||
@ -123,7 +123,7 @@ while (<STDIN>)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
print TEMP "<br>\n<br>\n" if (!/^[\s.]/);
|
print TEMP "<br>\n<br>\n" if ($innf || !/^[\s.]/);
|
||||||
}
|
}
|
||||||
redo; # Now process the lookahead line we just read
|
redo; # Now process the lookahead line we just read
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,7 @@ Email domain: cam.ac.uk
|
|||||||
University of Cambridge Computing Service,
|
University of Cambridge Computing Service,
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
|
|
||||||
Copyright (c) 1997-2012 University of Cambridge
|
Copyright (c) 1997-2014 University of Cambridge
|
||||||
All rights reserved
|
All rights reserved
|
||||||
|
|
||||||
|
|
||||||
@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
|
|||||||
Email local part: hzmester
|
Email local part: hzmester
|
||||||
Emain domain: freemail.hu
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
Copyright(c) 2010-2012 Zoltan Herczeg
|
Copyright(c) 2010-2014 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
|
|||||||
Email local part: hzmester
|
Email local part: hzmester
|
||||||
Emain domain: freemail.hu
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
Copyright(c) 2009-2012 Zoltan Herczeg
|
Copyright(c) 2009-2014 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
@ -60,6 +60,11 @@
|
|||||||
# 2012-09-06 PH added support for PCRE_EBCDIC_NL25
|
# 2012-09-06 PH added support for PCRE_EBCDIC_NL25
|
||||||
# 2012-09-08 ChPe added PCRE32 support
|
# 2012-09-08 ChPe added PCRE32 support
|
||||||
# 2012-10-23 PH added support for VALGRIND and GCOV
|
# 2012-10-23 PH added support for VALGRIND and GCOV
|
||||||
|
# 2012-12-08 PH added patch from Daniel Richard G to quash some MSVC warnings
|
||||||
|
# 2013-07-01 PH realized that the "support" for GCOV was a total nonsense and
|
||||||
|
# so it has been removed.
|
||||||
|
# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
|
||||||
|
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
|
||||||
|
|
||||||
PROJECT(PCRE C CXX)
|
PROJECT(PCRE C CXX)
|
||||||
|
|
||||||
@ -128,6 +133,9 @@ SET(PCRE_EBCDIC_NL25 OFF CACHE BOOL
|
|||||||
SET(PCRE_LINK_SIZE "2" CACHE STRING
|
SET(PCRE_LINK_SIZE "2" CACHE STRING
|
||||||
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
|
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
|
||||||
|
|
||||||
|
SET(PCRE_PARENS_NEST_LIMIT "250" CACHE STRING
|
||||||
|
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
|
||||||
|
|
||||||
SET(PCRE_MATCH_LIMIT "10000000" CACHE STRING
|
SET(PCRE_MATCH_LIMIT "10000000" CACHE STRING
|
||||||
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
|
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
|
||||||
|
|
||||||
@ -164,9 +172,6 @@ SET(PCRE_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
|
|||||||
SET(PCRE_SUPPORT_VALGRIND OFF CACHE BOOL
|
SET(PCRE_SUPPORT_VALGRIND OFF CACHE BOOL
|
||||||
"Enable Valgrind support.")
|
"Enable Valgrind support.")
|
||||||
|
|
||||||
SET(PCRE_SUPPORT_COVERAGE OFF CACHE BOOL
|
|
||||||
"Enable code coverage support using gcov.")
|
|
||||||
|
|
||||||
OPTION(PCRE_SHOW_REPORT "Show the final configuration report" ON)
|
OPTION(PCRE_SHOW_REPORT "Show the final configuration report" ON)
|
||||||
OPTION(PCRE_BUILD_PCREGREP "Build pcregrep" ON)
|
OPTION(PCRE_BUILD_PCREGREP "Build pcregrep" ON)
|
||||||
OPTION(PCRE_BUILD_TESTS "Build the tests" ON)
|
OPTION(PCRE_BUILD_TESTS "Build the tests" ON)
|
||||||
@ -181,6 +186,12 @@ IF (MINGW)
|
|||||||
OFF)
|
OFF)
|
||||||
ENDIF(MINGW)
|
ENDIF(MINGW)
|
||||||
|
|
||||||
|
IF(MSVC)
|
||||||
|
OPTION(INSTALL_MSVC_PDB
|
||||||
|
"ON=Install .pdb files built by MSVC, if generated"
|
||||||
|
OFF)
|
||||||
|
ENDIF(MSVC)
|
||||||
|
|
||||||
# bzip2 lib
|
# bzip2 lib
|
||||||
IF(BZIP2_FOUND)
|
IF(BZIP2_FOUND)
|
||||||
OPTION (PCRE_SUPPORT_LIBBZ2 "Enable support for linking pcregrep with libbz2." ON)
|
OPTION (PCRE_SUPPORT_LIBBZ2 "Enable support for linking pcregrep with libbz2." ON)
|
||||||
@ -296,13 +307,6 @@ IF(PCRE_SUPPORT_VALGRIND)
|
|||||||
SET(SUPPORT_VALGRIND 1)
|
SET(SUPPORT_VALGRIND 1)
|
||||||
ENDIF(PCRE_SUPPORT_VALGRIND)
|
ENDIF(PCRE_SUPPORT_VALGRIND)
|
||||||
|
|
||||||
IF(PCRE_SUPPORT_COVERAGE)
|
|
||||||
SET(SUPPORT_GCOV 1)
|
|
||||||
IF(NOT CMAKE_COMPILER_IS_GNUCC)
|
|
||||||
MESSAGE(FATAL_ERROR "Code coverage reports can only be generated when using GCC")
|
|
||||||
ENDIF(NOT CMAKE_COMPILER_IS_GNUCC)
|
|
||||||
ENDIF(PCRE_SUPPORT_COVERAGE)
|
|
||||||
|
|
||||||
# This next one used to contain
|
# This next one used to contain
|
||||||
# SET(PCRETEST_LIBS ${READLINE_LIBRARY})
|
# SET(PCRETEST_LIBS ${READLINE_LIBRARY})
|
||||||
# but I was advised to add the NCURSES test as well, along with
|
# but I was advised to add the NCURSES test as well, along with
|
||||||
@ -552,6 +556,17 @@ SET(PCREPOSIX_SOURCES
|
|||||||
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
|
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
|
||||||
ENDIF(MINGW AND NOT PCRE_STATIC)
|
ENDIF(MINGW AND NOT PCRE_STATIC)
|
||||||
|
|
||||||
|
IF(MSVC AND NOT PCRE_STATIC)
|
||||||
|
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre.rc)
|
||||||
|
SET(PCRE_SOURCES
|
||||||
|
${PCRE_SOURCES} pcre.rc)
|
||||||
|
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre.rc)
|
||||||
|
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
|
||||||
|
SET(PCREPOSIX_SOURCES
|
||||||
|
${PCREPOSIX_SOURCES} pcreposix.rc)
|
||||||
|
ENDIF (EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
|
||||||
|
ENDIF(MSVC AND NOT PCRE_STATIC)
|
||||||
|
|
||||||
SET(PCRECPP_HEADERS
|
SET(PCRECPP_HEADERS
|
||||||
pcrecpp.h
|
pcrecpp.h
|
||||||
pcre_scanner.h
|
pcre_scanner.h
|
||||||
@ -570,7 +585,7 @@ SET(PCRECPP_SOURCES
|
|||||||
ADD_DEFINITIONS(-DHAVE_CONFIG_H)
|
ADD_DEFINITIONS(-DHAVE_CONFIG_H)
|
||||||
|
|
||||||
IF(MSVC)
|
IF(MSVC)
|
||||||
ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE)
|
ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS)
|
||||||
ENDIF(MSVC)
|
ENDIF(MSVC)
|
||||||
|
|
||||||
SET(CMAKE_INCLUDE_CURRENT_DIR 1)
|
SET(CMAKE_INCLUDE_CURRENT_DIR 1)
|
||||||
@ -772,7 +787,7 @@ MESSAGE(\" \")
|
|||||||
# This is a generated file.
|
# This is a generated file.
|
||||||
srcdir=${PROJECT_SOURCE_DIR}
|
srcdir=${PROJECT_SOURCE_DIR}
|
||||||
pcretest=${PCRETEST_EXE}
|
pcretest=${PCRETEST_EXE}
|
||||||
source ${PROJECT_SOURCE_DIR}/RunTest
|
. ${PROJECT_SOURCE_DIR}/RunTest
|
||||||
if test \"$?\" != \"0\"; then exit 1; fi
|
if test \"$?\" != \"0\"; then exit 1; fi
|
||||||
# End
|
# End
|
||||||
")
|
")
|
||||||
@ -788,7 +803,7 @@ if test \"$?\" != \"0\"; then exit 1; fi
|
|||||||
srcdir=${PROJECT_SOURCE_DIR}
|
srcdir=${PROJECT_SOURCE_DIR}
|
||||||
pcregrep=${PCREGREP_EXE}
|
pcregrep=${PCREGREP_EXE}
|
||||||
pcretest=${PCRETEST_EXE}
|
pcretest=${PCRETEST_EXE}
|
||||||
source ${PROJECT_SOURCE_DIR}/RunGrepTest
|
. ${PROJECT_SOURCE_DIR}/RunGrepTest
|
||||||
if test \"$?\" != \"0\"; then exit 1; fi
|
if test \"$?\" != \"0\"; then exit 1; fi
|
||||||
# End
|
# End
|
||||||
")
|
")
|
||||||
@ -877,6 +892,17 @@ INSTALL(FILES ${man1} DESTINATION man/man1)
|
|||||||
INSTALL(FILES ${man3} DESTINATION man/man3)
|
INSTALL(FILES ${man3} DESTINATION man/man3)
|
||||||
INSTALL(FILES ${html} DESTINATION share/doc/pcre/html)
|
INSTALL(FILES ${html} DESTINATION share/doc/pcre/html)
|
||||||
|
|
||||||
|
IF(MSVC AND INSTALL_MSVC_PDB)
|
||||||
|
INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre.pdb
|
||||||
|
${PROJECT_BINARY_DIR}/pcreposix.pdb
|
||||||
|
DESTINATION bin
|
||||||
|
CONFIGURATIONS RelWithDebInfo)
|
||||||
|
INSTALL(FILES ${PROJECT_BINARY_DIR}/pcred.pdb
|
||||||
|
${PROJECT_BINARY_DIR}/pcreposixd.pdb
|
||||||
|
DESTINATION bin
|
||||||
|
CONFIGURATIONS Debug)
|
||||||
|
ENDIF(MSVC AND INSTALL_MSVC_PDB)
|
||||||
|
|
||||||
# help, only for nice output
|
# help, only for nice output
|
||||||
IF(BUILD_SHARED_LIBS)
|
IF(BUILD_SHARED_LIBS)
|
||||||
SET(BUILD_STATIC_LIBS OFF)
|
SET(BUILD_STATIC_LIBS OFF)
|
||||||
@ -917,6 +943,7 @@ IF(PCRE_SHOW_REPORT)
|
|||||||
MESSAGE(STATUS " No stack recursion .............. : ${PCRE_NO_RECURSE}")
|
MESSAGE(STATUS " No stack recursion .............. : ${PCRE_NO_RECURSE}")
|
||||||
MESSAGE(STATUS " POSIX mem threshold ............. : ${PCRE_POSIX_MALLOC_THRESHOLD}")
|
MESSAGE(STATUS " POSIX mem threshold ............. : ${PCRE_POSIX_MALLOC_THRESHOLD}")
|
||||||
MESSAGE(STATUS " Internal link size .............. : ${PCRE_LINK_SIZE}")
|
MESSAGE(STATUS " Internal link size .............. : ${PCRE_LINK_SIZE}")
|
||||||
|
MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE_PARENS_NEST_LIMIT}")
|
||||||
MESSAGE(STATUS " Match limit ..................... : ${PCRE_MATCH_LIMIT}")
|
MESSAGE(STATUS " Match limit ..................... : ${PCRE_MATCH_LIMIT}")
|
||||||
MESSAGE(STATUS " Match limit recursion ........... : ${PCRE_MATCH_LIMIT_RECURSION}")
|
MESSAGE(STATUS " Match limit recursion ........... : ${PCRE_MATCH_LIMIT_RECURSION}")
|
||||||
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
|
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
|
||||||
@ -953,6 +980,11 @@ IF(PCRE_SHOW_REPORT)
|
|||||||
MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
|
MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
|
||||||
MESSAGE(STATUS " Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}")
|
MESSAGE(STATUS " Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}")
|
||||||
ENDIF(MINGW AND NOT PCRE_STATIC)
|
ENDIF(MINGW AND NOT PCRE_STATIC)
|
||||||
|
|
||||||
|
IF(MSVC)
|
||||||
|
MESSAGE(STATUS " Install MSVC .pdb files ..........: ${INSTALL_MSVC_PDB}")
|
||||||
|
ENDIF(MSVC)
|
||||||
|
|
||||||
MESSAGE(STATUS "")
|
MESSAGE(STATUS "")
|
||||||
ENDIF(PCRE_SHOW_REPORT)
|
ENDIF(PCRE_SHOW_REPORT)
|
||||||
|
|
||||||
|
@ -1,6 +1,501 @@
|
|||||||
ChangeLog for PCRE
|
ChangeLog for PCRE
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
|
Version 8.35 04-April-2014
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
1. A new flag is set, when property checks are present in an XCLASS.
|
||||||
|
When this flag is not set, PCRE can perform certain optimizations
|
||||||
|
such as studying these XCLASS-es.
|
||||||
|
|
||||||
|
2. The auto-possessification of character sets were improved: a normal
|
||||||
|
and an extended character set can be compared now. Furthermore
|
||||||
|
the JIT compiler optimizes more character set checks.
|
||||||
|
|
||||||
|
3. Got rid of some compiler warnings for potentially uninitialized variables
|
||||||
|
that show up only when compiled with -O2.
|
||||||
|
|
||||||
|
4. A pattern such as (?=ab\K) that uses \K in an assertion can set the start
|
||||||
|
of a match later then the end of the match. The pcretest program was not
|
||||||
|
handling the case sensibly - it was outputting from the start to the next
|
||||||
|
binary zero. It now reports this situation in a message, and outputs the
|
||||||
|
text from the end to the start.
|
||||||
|
|
||||||
|
5. Fast forward search is improved in JIT. Instead of the first three
|
||||||
|
characters, any three characters with fixed position can be searched.
|
||||||
|
Search order: first, last, middle.
|
||||||
|
|
||||||
|
6. Improve character range checks in JIT. Characters are read by an inprecise
|
||||||
|
function now, which returns with an unknown value if the character code is
|
||||||
|
above a certain treshold (e.g: 256). The only limitation is that the value
|
||||||
|
must be bigger than the treshold as well. This function is useful, when
|
||||||
|
the characters above the treshold are handled in the same way.
|
||||||
|
|
||||||
|
7. The macros whose names start with RAWUCHAR are placeholders for a future
|
||||||
|
mode in which only the bottom 21 bits of 32-bit data items are used. To
|
||||||
|
make this more memorable for those maintaining the code, the names have
|
||||||
|
been changed to start with UCHAR21, and an extensive comment has been added
|
||||||
|
to their definition.
|
||||||
|
|
||||||
|
8. Add missing (new) files sljitNativeTILEGX.c and sljitNativeTILEGX-encoder.c
|
||||||
|
to the export list in Makefile.am (they were accidentally omitted from the
|
||||||
|
8.34 tarball).
|
||||||
|
|
||||||
|
9. The informational output from pcretest used the phrase "starting byte set"
|
||||||
|
which is inappropriate for the 16-bit and 32-bit libraries. As the output
|
||||||
|
for "first char" and "need char" really means "non-UTF-char", I've changed
|
||||||
|
"byte" to "char", and slightly reworded the output. The documentation about
|
||||||
|
these values has also been (I hope) clarified.
|
||||||
|
|
||||||
|
10. Another JIT related optimization: use table jumps for selecting the correct
|
||||||
|
backtracking path, when more than four alternatives are present inside a
|
||||||
|
bracket.
|
||||||
|
|
||||||
|
11. Empty match is not possible, when the minimum length is greater than zero,
|
||||||
|
and there is no \K in the pattern. JIT should avoid empty match checks in
|
||||||
|
such cases.
|
||||||
|
|
||||||
|
12. In a caseless character class with UCP support, when a character with more
|
||||||
|
than one alternative case was not the first character of a range, not all
|
||||||
|
the alternative cases were added to the class. For example, s and \x{17f}
|
||||||
|
are both alternative cases for S: the class [RST] was handled correctly,
|
||||||
|
but [R-T] was not.
|
||||||
|
|
||||||
|
13. The configure.ac file always checked for pthread support when JIT was
|
||||||
|
enabled. This is not used in Windows, so I have put this test inside a
|
||||||
|
check for the presence of windows.h (which was already tested for).
|
||||||
|
|
||||||
|
14. Improve pattern prefix search by a simplified Boyer-Moore algorithm in JIT.
|
||||||
|
The algorithm provides a way to skip certain starting offsets, and usually
|
||||||
|
faster than linear prefix searches.
|
||||||
|
|
||||||
|
15. Change 13 for 8.20 updated RunTest to check for the 'fr' locale as well
|
||||||
|
as for 'fr_FR' and 'french'. For some reason, however, it then used the
|
||||||
|
Windows-specific input and output files, which have 'french' screwed in.
|
||||||
|
So this could never have worked. One of the problems with locales is that
|
||||||
|
they aren't always the same. I have now updated RunTest so that it checks
|
||||||
|
the output of the locale test (test 3) against three different output
|
||||||
|
files, and it allows the test to pass if any one of them matches. With luck
|
||||||
|
this should make the test pass on some versions of Solaris where it was
|
||||||
|
failing. Because of the uncertainty, the script did not used to stop if
|
||||||
|
test 3 failed; it now does. If further versions of a French locale ever
|
||||||
|
come to light, they can now easily be added.
|
||||||
|
|
||||||
|
16. If --with-pcregrep-bufsize was given a non-integer value such as "50K",
|
||||||
|
there was a message during ./configure, but it did not stop. This now
|
||||||
|
provokes an error. The invalid example in README has been corrected.
|
||||||
|
If a value less than the minimum is given, the minimum value has always
|
||||||
|
been used, but now a warning is given.
|
||||||
|
|
||||||
|
17. If --enable-bsr-anycrlf was set, the special 16/32-bit test failed. This
|
||||||
|
was a bug in the test system, which is now fixed. Also, the list of various
|
||||||
|
configurations that are tested for each release did not have one with both
|
||||||
|
16/32 bits and --enable-bar-anycrlf. It now does.
|
||||||
|
|
||||||
|
18. pcretest was missing "-C bsr" for displaying the \R default setting.
|
||||||
|
|
||||||
|
19. Little endian PowerPC systems are supported now by the JIT compiler.
|
||||||
|
|
||||||
|
20. The fast forward newline mechanism could enter to an infinite loop on
|
||||||
|
certain invalid UTF-8 input. Although we don't support these cases
|
||||||
|
this issue can be fixed by a performance optimization.
|
||||||
|
|
||||||
|
21. Change 33 of 8.34 is not sufficient to ensure stack safety because it does
|
||||||
|
not take account if existing stack usage. There is now a new global
|
||||||
|
variable called pcre_stack_guard that can be set to point to an external
|
||||||
|
function to check stack availability. It is called at the start of
|
||||||
|
processing every parenthesized group.
|
||||||
|
|
||||||
|
22. A typo in the code meant that in ungreedy mode the max/min qualifier
|
||||||
|
behaved like a min-possessive qualifier, and, for example, /a{1,3}b/U did
|
||||||
|
not match "ab".
|
||||||
|
|
||||||
|
23. When UTF was disabled, the JIT program reported some incorrect compile
|
||||||
|
errors. These messages are silenced now.
|
||||||
|
|
||||||
|
24. Experimental support for ARM-64 and MIPS-64 has been added to the JIT
|
||||||
|
compiler.
|
||||||
|
|
||||||
|
25. Change all the temporary files used in RunGrepTest to be different to those
|
||||||
|
used by RunTest so that the tests can be run simultaneously, for example by
|
||||||
|
"make -j check".
|
||||||
|
|
||||||
|
|
||||||
|
Version 8.34 15-December-2013
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
1. Add pcre[16|32]_jit_free_unused_memory to forcibly free unused JIT
|
||||||
|
executable memory. Patch inspired by Carsten Klein.
|
||||||
|
|
||||||
|
2. ./configure --enable-coverage defined SUPPORT_GCOV in config.h, although
|
||||||
|
this macro is never tested and has no effect, because the work to support
|
||||||
|
coverage involves only compiling and linking options and special targets in
|
||||||
|
the Makefile. The comment in config.h implied that defining the macro would
|
||||||
|
enable coverage support, which is totally false. There was also support for
|
||||||
|
setting this macro in the CMake files (my fault, I just copied it from
|
||||||
|
configure). SUPPORT_GCOV has now been removed.
|
||||||
|
|
||||||
|
3. Make a small performance improvement in strlen16() and strlen32() in
|
||||||
|
pcretest.
|
||||||
|
|
||||||
|
4. Change 36 for 8.33 left some unreachable statements in pcre_exec.c,
|
||||||
|
detected by the Solaris compiler (gcc doesn't seem to be able to diagnose
|
||||||
|
these cases). There was also one in pcretest.c.
|
||||||
|
|
||||||
|
5. Cleaned up a "may be uninitialized" compiler warning in pcre_exec.c.
|
||||||
|
|
||||||
|
6. In UTF mode, the code for checking whether a group could match an empty
|
||||||
|
string (which is used for indefinitely repeated groups to allow for
|
||||||
|
breaking an infinite loop) was broken when the group contained a repeated
|
||||||
|
negated single-character class with a character that occupied more than one
|
||||||
|
data item and had a minimum repetition of zero (for example, [^\x{100}]* in
|
||||||
|
UTF-8 mode). The effect was undefined: the group might or might not be
|
||||||
|
deemed as matching an empty string, or the program might have crashed.
|
||||||
|
|
||||||
|
7. The code for checking whether a group could match an empty string was not
|
||||||
|
recognizing that \h, \H, \v, \V, and \R must match a character.
|
||||||
|
|
||||||
|
8. Implemented PCRE_INFO_MATCH_EMPTY, which yields 1 if the pattern can match
|
||||||
|
an empty string. If it can, pcretest shows this in its information output.
|
||||||
|
|
||||||
|
9. Fixed two related bugs that applied to Unicode extended grapheme clusters
|
||||||
|
that were repeated with a maximizing qualifier (e.g. \X* or \X{2,5}) when
|
||||||
|
matched by pcre_exec() without using JIT:
|
||||||
|
|
||||||
|
(a) If the rest of the pattern did not match after a maximal run of
|
||||||
|
grapheme clusters, the code for backing up to try with fewer of them
|
||||||
|
did not always back up over a full grapheme when characters that do not
|
||||||
|
have the modifier quality were involved, e.g. Hangul syllables.
|
||||||
|
|
||||||
|
(b) If the match point in a subject started with modifier character, and
|
||||||
|
there was no match, the code could incorrectly back up beyond the match
|
||||||
|
point, and potentially beyond the first character in the subject,
|
||||||
|
leading to a segfault or an incorrect match result.
|
||||||
|
|
||||||
|
10. A conditional group with an assertion condition could lead to PCRE
|
||||||
|
recording an incorrect first data item for a match if no other first data
|
||||||
|
item was recorded. For example, the pattern (?(?=ab)ab) recorded "a" as a
|
||||||
|
first data item, and therefore matched "ca" after "c" instead of at the
|
||||||
|
start.
|
||||||
|
|
||||||
|
11. Change 40 for 8.33 (allowing pcregrep to find empty strings) showed up a
|
||||||
|
bug that caused the command "echo a | ./pcregrep -M '|a'" to loop.
|
||||||
|
|
||||||
|
12. The source of pcregrep now includes z/OS-specific code so that it can be
|
||||||
|
compiled for z/OS as part of the special z/OS distribution.
|
||||||
|
|
||||||
|
13. Added the -T and -TM options to pcretest.
|
||||||
|
|
||||||
|
14. The code in pcre_compile.c for creating the table of named capturing groups
|
||||||
|
has been refactored. Instead of creating the table dynamically during the
|
||||||
|
actual compiling pass, the information is remembered during the pre-compile
|
||||||
|
pass (on the stack unless there are more than 20 named groups, in which
|
||||||
|
case malloc() is used) and the whole table is created before the actual
|
||||||
|
compile happens. This has simplified the code (it is now nearly 150 lines
|
||||||
|
shorter) and prepared the way for better handling of references to groups
|
||||||
|
with duplicate names.
|
||||||
|
|
||||||
|
15. A back reference to a named subpattern when there is more than one of the
|
||||||
|
same name now checks them in the order in which they appear in the pattern.
|
||||||
|
The first one that is set is used for the reference. Previously only the
|
||||||
|
first one was inspected. This change makes PCRE more compatible with Perl.
|
||||||
|
|
||||||
|
16. Unicode character properties were updated from Unicode 6.3.0.
|
||||||
|
|
||||||
|
17. The compile-time code for auto-possessification has been refactored, based
|
||||||
|
on a patch by Zoltan Herczeg. It now happens after instead of during
|
||||||
|
compilation. The code is cleaner, and more cases are handled. The option
|
||||||
|
PCRE_NO_AUTO_POSSESS is added for testing purposes, and the -O and /O
|
||||||
|
options in pcretest are provided to set it. It can also be set by
|
||||||
|
(*NO_AUTO_POSSESS) at the start of a pattern.
|
||||||
|
|
||||||
|
18. The character VT has been added to the default ("C" locale) set of
|
||||||
|
characters that match \s and are generally treated as white space,
|
||||||
|
following this same change in Perl 5.18. There is now no difference between
|
||||||
|
"Perl space" and "POSIX space". Whether VT is treated as white space in
|
||||||
|
other locales depends on the locale.
|
||||||
|
|
||||||
|
19. The code for checking named groups as conditions, either for being set or
|
||||||
|
for being recursed, has been refactored (this is related to 14 and 15
|
||||||
|
above). Processing unduplicated named groups should now be as fast at
|
||||||
|
numerical groups, and processing duplicated groups should be faster than
|
||||||
|
before.
|
||||||
|
|
||||||
|
20. Two patches to the CMake build system, by Alexander Barkov:
|
||||||
|
|
||||||
|
(1) Replace the "source" command by "." in CMakeLists.txt because
|
||||||
|
"source" is a bash-ism.
|
||||||
|
|
||||||
|
(2) Add missing HAVE_STDINT_H and HAVE_INTTYPES_H to config-cmake.h.in;
|
||||||
|
without these the CMake build does not work on Solaris.
|
||||||
|
|
||||||
|
21. Perl has changed its handling of \8 and \9. If there is no previously
|
||||||
|
encountered capturing group of those numbers, they are treated as the
|
||||||
|
literal characters 8 and 9 instead of a binary zero followed by the
|
||||||
|
literals. PCRE now does the same.
|
||||||
|
|
||||||
|
22. Following Perl, added \o{} to specify codepoints in octal, making it
|
||||||
|
possible to specify values greater than 0777 and also making them
|
||||||
|
unambiguous.
|
||||||
|
|
||||||
|
23. Perl now gives an error for missing closing braces after \x{... instead of
|
||||||
|
treating the string as literal. PCRE now does the same.
|
||||||
|
|
||||||
|
24. RunTest used to grumble if an inappropriate test was selected explicitly,
|
||||||
|
but just skip it when running all tests. This make it awkward to run ranges
|
||||||
|
of tests when one of them was inappropriate. Now it just skips any
|
||||||
|
inappropriate tests, as it always did when running all tests.
|
||||||
|
|
||||||
|
25. If PCRE_AUTO_CALLOUT and PCRE_UCP were set for a pattern that contained
|
||||||
|
character types such as \d or \w, too many callouts were inserted, and the
|
||||||
|
data that they returned was rubbish.
|
||||||
|
|
||||||
|
26. In UCP mode, \s was not matching two of the characters that Perl matches,
|
||||||
|
namely NEL (U+0085) and MONGOLIAN VOWEL SEPARATOR (U+180E), though they
|
||||||
|
were matched by \h. The code has now been refactored so that the lists of
|
||||||
|
the horizontal and vertical whitespace characters used for \h and \v (which
|
||||||
|
are defined only in one place) are now also used for \s.
|
||||||
|
|
||||||
|
27. Add JIT support for the 64 bit TileGX architecture.
|
||||||
|
Patch by Jiong Wang (Tilera Corporation).
|
||||||
|
|
||||||
|
28. Possessive quantifiers for classes (both explicit and automatically
|
||||||
|
generated) now use special opcodes instead of wrapping in ONCE brackets.
|
||||||
|
|
||||||
|
29. Whereas an item such as A{4}+ ignored the possessivenes of the quantifier
|
||||||
|
(because it's meaningless), this was not happening when PCRE_CASELESS was
|
||||||
|
set. Not wrong, but inefficient.
|
||||||
|
|
||||||
|
30. Updated perltest.pl to add /u (force Unicode mode) when /W (use Unicode
|
||||||
|
properties for \w, \d, etc) is present in a test regex. Otherwise if the
|
||||||
|
test contains no characters greater than 255, Perl doesn't realise it
|
||||||
|
should be using Unicode semantics.
|
||||||
|
|
||||||
|
31. Upgraded the handling of the POSIX classes [:graph:], [:print:], and
|
||||||
|
[:punct:] when PCRE_UCP is set so as to include the same characters as Perl
|
||||||
|
does in Unicode mode.
|
||||||
|
|
||||||
|
32. Added the "forbid" facility to pcretest so that putting tests into the
|
||||||
|
wrong test files can sometimes be quickly detected.
|
||||||
|
|
||||||
|
33. There is now a limit (default 250) on the depth of nesting of parentheses.
|
||||||
|
This limit is imposed to control the amount of system stack used at compile
|
||||||
|
time. It can be changed at build time by --with-parens-nest-limit=xxx or
|
||||||
|
the equivalent in CMake.
|
||||||
|
|
||||||
|
34. Character classes such as [A-\d] or [a-[:digit:]] now cause compile-time
|
||||||
|
errors. Perl warns for these when in warning mode, but PCRE has no facility
|
||||||
|
for giving warnings.
|
||||||
|
|
||||||
|
35. Change 34 for 8.13 allowed quantifiers on assertions, because Perl does.
|
||||||
|
However, this was not working for (?!) because it is optimized to (*FAIL),
|
||||||
|
for which PCRE does not allow quantifiers. The optimization is now disabled
|
||||||
|
when a quantifier follows (?!). I can't see any use for this, but it makes
|
||||||
|
things uniform.
|
||||||
|
|
||||||
|
36. Perl no longer allows group names to start with digits, so I have made this
|
||||||
|
change also in PCRE. It simplifies the code a bit.
|
||||||
|
|
||||||
|
37. In extended mode, Perl ignores spaces before a + that indicates a
|
||||||
|
possessive quantifier. PCRE allowed a space before the quantifier, but not
|
||||||
|
before the possessive +. It now does.
|
||||||
|
|
||||||
|
38. The use of \K (reset reported match start) within a repeated possessive
|
||||||
|
group such as (a\Kb)*+ was not working.
|
||||||
|
|
||||||
|
40. Document that the same character tables must be used at compile time and
|
||||||
|
run time, and that the facility to pass tables to pcre_exec() and
|
||||||
|
pcre_dfa_exec() is for use only with saved/restored patterns.
|
||||||
|
|
||||||
|
41. Applied Jeff Trawick's patch CMakeLists.txt, which "provides two new
|
||||||
|
features for Builds with MSVC:
|
||||||
|
|
||||||
|
1. Support pcre.rc and/or pcreposix.rc (as is already done for MinGW
|
||||||
|
builds). The .rc files can be used to set FileDescription and many other
|
||||||
|
attributes.
|
||||||
|
|
||||||
|
2. Add an option (-DINSTALL_MSVC_PDB) to enable installation of .pdb files.
|
||||||
|
This allows higher-level build scripts which want .pdb files to avoid
|
||||||
|
hard-coding the exact files needed."
|
||||||
|
|
||||||
|
42. Added support for [[:<:]] and [[:>:]] as used in the BSD POSIX library to
|
||||||
|
mean "start of word" and "end of word", respectively, as a transition aid.
|
||||||
|
|
||||||
|
43. A minimizing repeat of a class containing codepoints greater than 255 in
|
||||||
|
non-UTF 16-bit or 32-bit modes caused an internal error when PCRE was
|
||||||
|
compiled to use the heap for recursion.
|
||||||
|
|
||||||
|
44. Got rid of some compiler warnings for unused variables when UTF but not UCP
|
||||||
|
is configured.
|
||||||
|
|
||||||
|
|
||||||
|
Version 8.33 28-May-2013
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
1. Added 'U' to some constants that are compared to unsigned integers, to
|
||||||
|
avoid compiler signed/unsigned warnings. Added (int) casts to unsigned
|
||||||
|
variables that are added to signed variables, to ensure the result is
|
||||||
|
signed and can be negated.
|
||||||
|
|
||||||
|
2. Applied patch by Daniel Richard G for quashing MSVC warnings to the
|
||||||
|
CMake config files.
|
||||||
|
|
||||||
|
3. Revise the creation of config.h.generic so that all boolean macros are
|
||||||
|
#undefined, whereas non-boolean macros are #ifndef/#endif-ed. This makes
|
||||||
|
overriding via -D on the command line possible.
|
||||||
|
|
||||||
|
4. Changing the definition of the variable "op" in pcre_exec.c from pcre_uchar
|
||||||
|
to unsigned int is reported to make a quite noticeable speed difference in
|
||||||
|
a specific Windows environment. Testing on Linux did also appear to show
|
||||||
|
some benefit (and it is clearly not harmful). Also fixed the definition of
|
||||||
|
Xop which should be unsigned.
|
||||||
|
|
||||||
|
5. Related to (4), changing the definition of the intermediate variable cc
|
||||||
|
in repeated character loops from pcre_uchar to pcre_uint32 also gave speed
|
||||||
|
improvements.
|
||||||
|
|
||||||
|
6. Fix forward search in JIT when link size is 3 or greater. Also removed some
|
||||||
|
unnecessary spaces.
|
||||||
|
|
||||||
|
7. Adjust autogen.sh and configure.ac to lose warnings given by automake 1.12
|
||||||
|
and later.
|
||||||
|
|
||||||
|
8. Fix two buffer over read issues in 16 and 32 bit modes. Affects JIT only.
|
||||||
|
|
||||||
|
9. Optimizing fast_forward_start_bits in JIT.
|
||||||
|
|
||||||
|
10. Adding support for callouts in JIT, and fixing some issues revealed
|
||||||
|
during this work. Namely:
|
||||||
|
|
||||||
|
(a) Unoptimized capturing brackets incorrectly reset on backtrack.
|
||||||
|
|
||||||
|
(b) Minimum length was not checked before the matching is started.
|
||||||
|
|
||||||
|
11. The value of capture_last that is passed to callouts was incorrect in some
|
||||||
|
cases when there was a capture on one path that was subsequently abandoned
|
||||||
|
after a backtrack. Also, the capture_last value is now reset after a
|
||||||
|
recursion, since all captures are also reset in this case.
|
||||||
|
|
||||||
|
12. The interpreter no longer returns the "too many substrings" error in the
|
||||||
|
case when an overflowing capture is in a branch that is subsequently
|
||||||
|
abandoned after a backtrack.
|
||||||
|
|
||||||
|
13. In the pathological case when an offset vector of size 2 is used, pcretest
|
||||||
|
now prints out the matched string after a yield of 0 or 1.
|
||||||
|
|
||||||
|
14. Inlining subpatterns in recursions, when certain conditions are fulfilled.
|
||||||
|
Only supported by the JIT compiler at the moment.
|
||||||
|
|
||||||
|
15. JIT compiler now supports 32 bit Macs thanks to Lawrence Velazquez.
|
||||||
|
|
||||||
|
16. Partial matches now set offsets[2] to the "bumpalong" value, that is, the
|
||||||
|
offset of the starting point of the matching process, provided the offsets
|
||||||
|
vector is large enough.
|
||||||
|
|
||||||
|
17. The \A escape now records a lookbehind value of 1, though its execution
|
||||||
|
does not actually inspect the previous character. This is to ensure that,
|
||||||
|
in partial multi-segment matching, at least one character from the old
|
||||||
|
segment is retained when a new segment is processed. Otherwise, if there
|
||||||
|
are no lookbehinds in the pattern, \A might match incorrectly at the start
|
||||||
|
of a new segment.
|
||||||
|
|
||||||
|
18. Added some #ifdef __VMS code into pcretest.c to help VMS implementations.
|
||||||
|
|
||||||
|
19. Redefined some pcre_uchar variables in pcre_exec.c as pcre_uint32; this
|
||||||
|
gives some modest performance improvement in 8-bit mode.
|
||||||
|
|
||||||
|
20. Added the PCRE-specific property \p{Xuc} for matching characters that can
|
||||||
|
be expressed in certain programming languages using Universal Character
|
||||||
|
Names.
|
||||||
|
|
||||||
|
21. Unicode validation has been updated in the light of Unicode Corrigendum #9,
|
||||||
|
which points out that "non characters" are not "characters that may not
|
||||||
|
appear in Unicode strings" but rather "characters that are reserved for
|
||||||
|
internal use and have only local meaning".
|
||||||
|
|
||||||
|
22. When a pattern was compiled with automatic callouts (PCRE_AUTO_CALLOUT) and
|
||||||
|
there was a conditional group that depended on an assertion, if the
|
||||||
|
assertion was false, the callout that immediately followed the alternation
|
||||||
|
in the condition was skipped when pcre_exec() was used for matching.
|
||||||
|
|
||||||
|
23. Allow an explicit callout to be inserted before an assertion that is the
|
||||||
|
condition for a conditional group, for compatibility with automatic
|
||||||
|
callouts, which always insert a callout at this point.
|
||||||
|
|
||||||
|
24. In 8.31, (*COMMIT) was confined to within a recursive subpattern. Perl also
|
||||||
|
confines (*SKIP) and (*PRUNE) in the same way, and this has now been done.
|
||||||
|
|
||||||
|
25. (*PRUNE) is now supported by the JIT compiler.
|
||||||
|
|
||||||
|
26. Fix infinite loop when /(?<=(*SKIP)ac)a/ is matched against aa.
|
||||||
|
|
||||||
|
27. Fix the case where there are two or more SKIPs with arguments that may be
|
||||||
|
ignored.
|
||||||
|
|
||||||
|
28. (*SKIP) is now supported by the JIT compiler.
|
||||||
|
|
||||||
|
29. (*THEN) is now supported by the JIT compiler.
|
||||||
|
|
||||||
|
30. Update RunTest with additional test selector options.
|
||||||
|
|
||||||
|
31. The way PCRE handles backtracking verbs has been changed in two ways.
|
||||||
|
|
||||||
|
(1) Previously, in something like (*COMMIT)(*SKIP), COMMIT would override
|
||||||
|
SKIP. Now, PCRE acts on whichever backtracking verb is reached first by
|
||||||
|
backtracking. In some cases this makes it more Perl-compatible, but Perl's
|
||||||
|
rather obscure rules do not always do the same thing.
|
||||||
|
|
||||||
|
(2) Previously, backtracking verbs were confined within assertions. This is
|
||||||
|
no longer the case for positive assertions, except for (*ACCEPT). Again,
|
||||||
|
this sometimes improves Perl compatibility, and sometimes does not.
|
||||||
|
|
||||||
|
32. A number of tests that were in test 2 because Perl did things differently
|
||||||
|
have been moved to test 1, because either Perl or PCRE has changed, and
|
||||||
|
these tests are now compatible.
|
||||||
|
|
||||||
|
32. Backtracking control verbs are now handled in the same way in JIT and
|
||||||
|
interpreter.
|
||||||
|
|
||||||
|
33. An opening parenthesis in a MARK/PRUNE/SKIP/THEN name in a pattern that
|
||||||
|
contained a forward subroutine reference caused a compile error.
|
||||||
|
|
||||||
|
34. Auto-detect and optimize limited repetitions in JIT.
|
||||||
|
|
||||||
|
35. Implement PCRE_NEVER_UTF to lock out the use of UTF, in particular,
|
||||||
|
blocking (*UTF) etc.
|
||||||
|
|
||||||
|
36. In the interpreter, maximizing pattern repetitions for characters and
|
||||||
|
character types now use tail recursion, which reduces stack usage.
|
||||||
|
|
||||||
|
37. The value of the max lookbehind was not correctly preserved if a compiled
|
||||||
|
and saved regex was reloaded on a host of different endianness.
|
||||||
|
|
||||||
|
38. Implemented (*LIMIT_MATCH) and (*LIMIT_RECURSION). As part of the extension
|
||||||
|
of the compiled pattern block, expand the flags field from 16 to 32 bits
|
||||||
|
because it was almost full.
|
||||||
|
|
||||||
|
39. Try madvise first before posix_madvise.
|
||||||
|
|
||||||
|
40. Change 7 for PCRE 7.9 made it impossible for pcregrep to find empty lines
|
||||||
|
with a pattern such as ^$. It has taken 4 years for anybody to notice! The
|
||||||
|
original change locked out all matches of empty strings. This has been
|
||||||
|
changed so that one match of an empty string per line is recognized.
|
||||||
|
Subsequent searches on the same line (for colouring or for --only-matching,
|
||||||
|
for example) do not recognize empty strings.
|
||||||
|
|
||||||
|
41. Applied a user patch to fix a number of spelling mistakes in comments.
|
||||||
|
|
||||||
|
42. Data lines longer than 65536 caused pcretest to crash.
|
||||||
|
|
||||||
|
43. Clarified the data type for length and startoffset arguments for pcre_exec
|
||||||
|
and pcre_dfa_exec in the function-specific man pages, where they were
|
||||||
|
explicitly stated to be in bytes, never having been updated. I also added
|
||||||
|
some clarification to the pcreapi man page.
|
||||||
|
|
||||||
|
44. A call to pcre_dfa_exec() with an output vector size less than 2 caused
|
||||||
|
a segmentation fault.
|
||||||
|
|
||||||
|
|
||||||
Version 8.32 30-November-2012
|
Version 8.32 30-November-2012
|
||||||
-----------------------------
|
-----------------------------
|
||||||
|
|
||||||
@ -1508,7 +2003,8 @@ Version 7.9 11-Apr-09
|
|||||||
7. A pattern that could match an empty string could cause pcregrep to loop; it
|
7. A pattern that could match an empty string could cause pcregrep to loop; it
|
||||||
doesn't make sense to accept an empty string match in pcregrep, so I have
|
doesn't make sense to accept an empty string match in pcregrep, so I have
|
||||||
locked it out (using PCRE's PCRE_NOTEMPTY option). By experiment, this
|
locked it out (using PCRE's PCRE_NOTEMPTY option). By experiment, this
|
||||||
seems to be how GNU grep behaves.
|
seems to be how GNU grep behaves. [But see later change 40 for release
|
||||||
|
8.33.]
|
||||||
|
|
||||||
8. The pattern (?(?=.*b)b|^) was incorrectly compiled as "match must be at
|
8. The pattern (?(?=.*b)b|^) was incorrectly compiled as "match must be at
|
||||||
start or after a newline", because the conditional assertion was not being
|
start or after a newline", because the conditional assertion was not being
|
||||||
@ -1751,7 +2247,7 @@ Version 7.7 07-May-08
|
|||||||
containing () gave an internal compiling error instead of "reference to
|
containing () gave an internal compiling error instead of "reference to
|
||||||
non-existent subpattern". Fortunately, when the pattern did exist, the
|
non-existent subpattern". Fortunately, when the pattern did exist, the
|
||||||
compiled code was correct. (When scanning forwards to check for the
|
compiled code was correct. (When scanning forwards to check for the
|
||||||
existencd of the subpattern, it was treating the data ']' as terminating
|
existence of the subpattern, it was treating the data ']' as terminating
|
||||||
the class, so got the count wrong. When actually compiling, the reference
|
the class, so got the count wrong. When actually compiling, the reference
|
||||||
was subsequently set up correctly.)
|
was subsequently set up correctly.)
|
||||||
|
|
||||||
|
@ -29,9 +29,9 @@ while (scalar(@ARGV) > 0)
|
|||||||
^\.TH\s\S|
|
^\.TH\s\S|
|
||||||
^\.SH\s\S|
|
^\.SH\s\S|
|
||||||
^\.SS\s\S|
|
^\.SS\s\S|
|
||||||
^\.TP(?:\s\d+)?\s*$|
|
^\.TP(?:\s?\d+)?\s*$|
|
||||||
^\.ti\s\S|
|
|
||||||
^\.SM\s*$|
|
^\.SM\s*$|
|
||||||
|
^\.br\s*$|
|
||||||
^\.rs\s*$|
|
^\.rs\s*$|
|
||||||
^\.sp\s*$|
|
^\.sp\s*$|
|
||||||
^\.nf\s*$|
|
^\.nf\s*$|
|
||||||
|
@ -54,12 +54,12 @@ Support for 16-bit and 32-bit data strings
|
|||||||
|
|
||||||
From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from
|
From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from
|
||||||
release 8.32, PCRE supports 32-bit data strings. The library can be compiled
|
release 8.32, PCRE supports 32-bit data strings. The library can be compiled
|
||||||
in any combination of 8-bit, 16-bit or 32-bit modes, creating different
|
in any combination of 8-bit, 16-bit or 32-bit modes, creating up to three
|
||||||
libraries. In the description that follows, the word "short" is
|
different libraries. In the description that follows, the word "short" is used
|
||||||
used for a 16-bit data quantity, and the word "unit" is used for a quantity
|
for a 16-bit data quantity, and the word "unit" is used for a quantity that is
|
||||||
that is a byte in 8-bit mode, a short in 16-bit mode and a 32-bit unsigned
|
a byte in 8-bit mode, a short in 16-bit mode and a 32-bit word in 32-bit mode.
|
||||||
integer in 32-bit mode. However, so as not to over-complicate the text, the
|
However, so as not to over-complicate the text, the names of PCRE functions are
|
||||||
names of PCRE functions are given in 8-bit form only.
|
given in 8-bit form only.
|
||||||
|
|
||||||
|
|
||||||
Computing the memory requirement: how it was
|
Computing the memory requirement: how it was
|
||||||
@ -94,6 +94,11 @@ runs more slowly than before (30% or more, depending on the pattern) because it
|
|||||||
is doing a full analysis of the pattern. My hope was that this would not be a
|
is doing a full analysis of the pattern. My hope was that this would not be a
|
||||||
big issue, and in the event, nobody has commented on it.
|
big issue, and in the event, nobody has commented on it.
|
||||||
|
|
||||||
|
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
|
||||||
|
(default 250, settable at build time) so as to put a limit on the amount of
|
||||||
|
system stack used by pcre_compile(). This is a safety feature for environments
|
||||||
|
with small stacks where the patterns are provided by users.
|
||||||
|
|
||||||
|
|
||||||
Traditional matching function
|
Traditional matching function
|
||||||
-----------------------------
|
-----------------------------
|
||||||
@ -122,27 +127,28 @@ same way. See the user documentation for details.
|
|||||||
The algorithm that is used for pcre_dfa_exec() is not a traditional FSM,
|
The algorithm that is used for pcre_dfa_exec() is not a traditional FSM,
|
||||||
because it may have a number of states active at one time. More work would be
|
because it may have a number of states active at one time. More work would be
|
||||||
needed at compile time to produce a traditional FSM where only one state is
|
needed at compile time to produce a traditional FSM where only one state is
|
||||||
ever active at once. I believe some other regex matchers work this way.
|
ever active at once. I believe some other regex matchers work this way. JIT
|
||||||
|
support is not available for this kind of matching.
|
||||||
|
|
||||||
|
|
||||||
Changeable options
|
Changeable options
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL) may
|
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and some
|
||||||
change in the middle of patterns. From PCRE 8.13, their processing is handled
|
others) may change in the middle of patterns. From PCRE 8.13, their processing
|
||||||
entirely at compile time by generating different opcodes for the different
|
is handled entirely at compile time by generating different opcodes for the
|
||||||
settings. The runtime functions do not need to keep track of an options state
|
different settings. The runtime functions do not need to keep track of an
|
||||||
any more.
|
options state any more.
|
||||||
|
|
||||||
|
|
||||||
Format of compiled patterns
|
Format of compiled patterns
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
The compiled form of a pattern is a vector of units (bytes in 8-bit mode, or
|
The compiled form of a pattern is a vector of unsigned units (bytes in 8-bit
|
||||||
shorts in 16-bit mode, 32-bit unsigned integers in 32-bit mode), containing
|
mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing items of
|
||||||
items of variable length. The first unit in an item contains an opcode, and
|
variable length. The first unit in an item contains an opcode, and the length
|
||||||
the length of the item is either implicit in the opcode or contained in the
|
of the item is either implicit in the opcode or contained in the data that
|
||||||
data that follows it.
|
follows it.
|
||||||
|
|
||||||
In many cases listed below, LINK_SIZE data values are specified for offsets
|
In many cases listed below, LINK_SIZE data values are specified for offsets
|
||||||
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
|
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
|
||||||
@ -151,8 +157,10 @@ default value for LINK_SIZE is 2, but PCRE can be compiled to use 3-byte or
|
|||||||
LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE
|
LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE
|
||||||
larger than 2 is necessary only when patterns whose compiled length is greater
|
larger than 2 is necessary only when patterns whose compiled length is greater
|
||||||
than 64K are going to be processed. In this description, we assume the "normal"
|
than 64K are going to be processed. In this description, we assume the "normal"
|
||||||
compilation options. Data values that are counts (e.g. for quantifiers) are
|
compilation options. Data values that are counts (e.g. quantifiers) are two
|
||||||
always just two bytes long (one short in 16-bit mode).
|
bytes long in 8-bit mode (most significant byte first), or one unit in 16-bit
|
||||||
|
and 32-bit modes.
|
||||||
|
|
||||||
|
|
||||||
Opcodes with no following data
|
Opcodes with no following data
|
||||||
------------------------------
|
------------------------------
|
||||||
@ -162,7 +170,7 @@ These items are all just one unit long
|
|||||||
OP_END end of pattern
|
OP_END end of pattern
|
||||||
OP_ANY match any one character other than newline
|
OP_ANY match any one character other than newline
|
||||||
OP_ALLANY match any one character, including newline
|
OP_ALLANY match any one character, including newline
|
||||||
OP_ANYBYTE match any single byte, even in UTF-8 mode
|
OP_ANYBYTE match any single unit, even in UTF-8/16 mode
|
||||||
OP_SOD match start of data: \A
|
OP_SOD match start of data: \A
|
||||||
OP_SOM, start of match (subject + offset): \G
|
OP_SOM, start of match (subject + offset): \G
|
||||||
OP_SET_SOM, set start of match (\K)
|
OP_SET_SOM, set start of match (\K)
|
||||||
@ -180,28 +188,33 @@ These items are all just one unit long
|
|||||||
OP_VSPACE \v
|
OP_VSPACE \v
|
||||||
OP_NOT_WORDCHAR \W
|
OP_NOT_WORDCHAR \W
|
||||||
OP_WORDCHAR \w
|
OP_WORDCHAR \w
|
||||||
OP_EODN match end of data or \n at end: \Z
|
OP_EODN match end of data or newline at end: \Z
|
||||||
OP_EOD match end of data: \z
|
OP_EOD match end of data: \z
|
||||||
OP_DOLL $ (end of data, or before final newline)
|
OP_DOLL $ (end of data, or before final newline)
|
||||||
OP_DOLLM $ multiline mode (end of data or before newline)
|
OP_DOLLM $ multiline mode (end of data or before newline)
|
||||||
OP_EXTUNI match an extended Unicode character
|
OP_EXTUNI match an extended Unicode grapheme cluster
|
||||||
OP_ANYNL match any Unicode newline sequence
|
OP_ANYNL match any Unicode newline sequence
|
||||||
|
|
||||||
|
OP_ASSERT_ACCEPT )
|
||||||
OP_ACCEPT ) These are Perl 5.10's "backtracking control
|
OP_ACCEPT ) These are Perl 5.10's "backtracking control
|
||||||
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
|
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
|
||||||
OP_FAIL ) parentheses, it may be preceded by one or more
|
OP_FAIL ) parentheses, it may be preceded by one or more
|
||||||
OP_PRUNE ) OP_CLOSE, followed by a 2-byte number,
|
OP_PRUNE ) OP_CLOSE, each followed by a count that
|
||||||
OP_SKIP ) indicating which parentheses must be closed.
|
OP_SKIP ) indicates which parentheses must be closed.
|
||||||
|
OP_THEN )
|
||||||
|
|
||||||
|
OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion.
|
||||||
|
This ends the assertion, not the entire pattern match.
|
||||||
|
|
||||||
|
|
||||||
Backtracking control verbs with (optional) data
|
Backtracking control verbs with optional data
|
||||||
-----------------------------------------------
|
---------------------------------------------
|
||||||
|
|
||||||
(*THEN) without an argument generates the opcode OP_THEN and no following data.
|
(*THEN) without an argument generates the opcode OP_THEN and no following data.
|
||||||
OP_MARK is followed by the mark name, preceded by a one-unit length, and
|
OP_MARK is followed by the mark name, preceded by a one-unit length, and
|
||||||
followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments,
|
followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments,
|
||||||
the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name
|
the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name
|
||||||
following in the same format.
|
following in the same format as OP_MARK.
|
||||||
|
|
||||||
|
|
||||||
Matching literal characters
|
Matching literal characters
|
||||||
@ -212,6 +225,10 @@ casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
|
|||||||
the character may be more than one unit long. In UTF-32 mode, characters
|
the character may be more than one unit long. In UTF-32 mode, characters
|
||||||
are always exactly one unit long.
|
are always exactly one unit long.
|
||||||
|
|
||||||
|
If there is only one character in a character class, OP_CHAR or OP_CHARI is
|
||||||
|
used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is,
|
||||||
|
for something like [^a]).
|
||||||
|
|
||||||
|
|
||||||
Repeating single characters
|
Repeating single characters
|
||||||
---------------------------
|
---------------------------
|
||||||
@ -232,10 +249,9 @@ following opcodes, which come in caseful and caseless versions:
|
|||||||
|
|
||||||
Each opcode is followed by the character that is to be repeated. In ASCII mode,
|
Each opcode is followed by the character that is to be repeated. In ASCII mode,
|
||||||
these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in
|
these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in
|
||||||
UTF-32 mode these are one-unit items.
|
UTF-32 mode these are one-unit items. Those with "MIN" in their names are the
|
||||||
Those with "MIN" in their names are the minimizing versions. Those with "POS"
|
minimizing versions. Those with "POS" in their names are possessive versions.
|
||||||
in their names are possessive versions. Other repeats make use of these
|
Other repeats make use of these opcodes:
|
||||||
opcodes:
|
|
||||||
|
|
||||||
Caseful Caseless
|
Caseful Caseless
|
||||||
OP_UPTO OP_UPTOI
|
OP_UPTO OP_UPTOI
|
||||||
@ -243,10 +259,15 @@ opcodes:
|
|||||||
OP_POSUPTO OP_POSUPTOI
|
OP_POSUPTO OP_POSUPTOI
|
||||||
OP_EXACT OP_EXACTI
|
OP_EXACT OP_EXACTI
|
||||||
|
|
||||||
Each of these is followed by a two-byte (one short) count (most significant
|
Each of these is followed by a count and then the repeated character. OP_UPTO
|
||||||
byte first in 8-bit mode) and then the repeated character. OP_UPTO matches from
|
matches from 0 to the given number. A repeat with a non-zero minimum and a
|
||||||
0 to the given number. A repeat with a non-zero minimum and a fixed maximum is
|
fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or
|
||||||
coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or OPT_POSUPTO).
|
OPT_POSUPTO).
|
||||||
|
|
||||||
|
Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI,
|
||||||
|
etc.) are used for repeated, negated, single-character classes such as [^a]*.
|
||||||
|
The normal single-character opcodes (OP_STAR, etc.) are used for repeated
|
||||||
|
positive single-character classes.
|
||||||
|
|
||||||
|
|
||||||
Repeating character types
|
Repeating character types
|
||||||
@ -277,7 +298,10 @@ Match by Unicode property
|
|||||||
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
|
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
|
||||||
character by testing its Unicode property (the \p and \P escape sequences).
|
character by testing its Unicode property (the \p and \P escape sequences).
|
||||||
Each is followed by two units that encode the desired property as a type and a
|
Each is followed by two units that encode the desired property as a type and a
|
||||||
value.
|
value. The types are a set of #defines of the form PT_xxx, and the values are
|
||||||
|
enumerations of the form ucp_xx, defined in the ucp.h source file. The value is
|
||||||
|
relevant only for PT_GC (General Category), PT_PC (Particular Category), and
|
||||||
|
PT_SC (Script).
|
||||||
|
|
||||||
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
||||||
three units: OP_PROP or OP_NOTPROP, and then the desired property type and
|
three units: OP_PROP or OP_NOTPROP, and then the desired property type and
|
||||||
@ -287,67 +311,88 @@ value.
|
|||||||
Character classes
|
Character classes
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
If there is only one character in the class, OP_CHAR or OP_CHARI is used for a
|
If there is only one character in a class, OP_CHAR or OP_CHARI is used for a
|
||||||
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
|
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
|
||||||
something like [^a]).
|
something like [^a]).
|
||||||
|
|
||||||
Another set of 13 repeating opcodes (called OP_NOTSTAR etc.) are used for
|
A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated,
|
||||||
repeated, negated, single-character classes. The normal single-character
|
negated, single-character classes. The normal single-character opcodes
|
||||||
opcodes (OP_STAR, etc.) are used for repeated positive single-character
|
(OP_STAR, etc.) are used for repeated positive single-character classes.
|
||||||
classes.
|
|
||||||
|
|
||||||
When there is more than one character in a class and all the characters are
|
When there is more than one character in a class, and all the code points are
|
||||||
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
|
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
|
||||||
negative one. In either case, the opcode is followed by a 32-byte (16-short)
|
negative one. In either case, the opcode is followed by a 32-byte (16-short,
|
||||||
bit map containing a 1 bit for every character that is acceptable. The bits are
|
8-word) bit map containing a 1 bit for every character that is acceptable. The
|
||||||
counted from the least significant end of each unit. In caseless mode, bits for
|
bits are counted from the least significant end of each unit. In caseless mode,
|
||||||
both cases are set.
|
bits for both cases are set.
|
||||||
|
|
||||||
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32 mode,
|
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32
|
||||||
subject characters with values greater than 255 can be handled correctly. For
|
mode, subject characters with values greater than 255 can be handled correctly.
|
||||||
OP_CLASS they do not match, whereas for OP_NCLASS they do.
|
For OP_CLASS they do not match, whereas for OP_NCLASS they do.
|
||||||
|
|
||||||
For classes containing characters with values greater than 255, OP_XCLASS is
|
For classes containing characters with values greater than 255 or that contain
|
||||||
used. It optionally uses a bit map (if any characters lie within it), followed
|
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any code points
|
||||||
by a list of pairs (for a range) and single characters. In caseless mode, both
|
are less than 256, followed by a list of pairs (for a range) and single
|
||||||
cases are explicitly listed. There is a flag character than indicates whether
|
characters. In caseless mode, both cases are explicitly listed.
|
||||||
it is a positive or a negative class.
|
|
||||||
|
OP_XCLASS is followed by a unit containing flag bits: XCL_NOT indicates that
|
||||||
|
this is a negative class, and XCL_MAP indicates that a bit map is present.
|
||||||
|
There follows the bit map, if XCL_MAP is set, and then a sequence of items
|
||||||
|
coded as follows:
|
||||||
|
|
||||||
|
XCL_END marks the end of the list
|
||||||
|
XCL_SINGLE one character follows
|
||||||
|
XCL_RANGE two characters follow
|
||||||
|
XCL_PROP a Unicode property (type, value) follows
|
||||||
|
XCL_NOTPROP a Unicode property (type, value) follows
|
||||||
|
|
||||||
|
If a range starts with a code point less than 256 and ends with one greater
|
||||||
|
than 256, an XCL_RANGE item is used, without setting any bits in the bit map.
|
||||||
|
This means that if no other items in the class set bits in the map, a map is
|
||||||
|
not needed.
|
||||||
|
|
||||||
|
|
||||||
Back references
|
Back references
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
OP_REF (caseful) or OP_REFI (caseless) is followed by two bytes (one short)
|
OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the
|
||||||
containing the reference number.
|
reference number if the reference is to a unique capturing group (either by
|
||||||
|
number or by name). When named groups are used, there may be more than one
|
||||||
|
group with the same name. In this case, a reference by name generates OP_DNREF
|
||||||
|
or OP_DNREFI. These are followed by two counts: the index (not the byte offset)
|
||||||
|
in the group name table of the first entry for the requred name, followed by
|
||||||
|
the number of groups with the same name.
|
||||||
|
|
||||||
|
|
||||||
Repeating character classes and back references
|
Repeating character classes and back references
|
||||||
-----------------------------------------------
|
-----------------------------------------------
|
||||||
|
|
||||||
Single-character classes are handled specially (see above). This section
|
Single-character classes are handled specially (see above). This section
|
||||||
applies to OP_CLASS and OP_REF[I]. In both cases, the repeat information
|
applies to other classes and also to back references. In both cases, the repeat
|
||||||
follows the base item. The matching code looks at the following opcode to see
|
information follows the base item. The matching code looks at the following
|
||||||
if it is one of
|
opcode to see if it is one of
|
||||||
|
|
||||||
OP_CRSTAR
|
OP_CRSTAR
|
||||||
OP_CRMINSTAR
|
OP_CRMINSTAR
|
||||||
|
OP_CRPOSSTAR
|
||||||
OP_CRPLUS
|
OP_CRPLUS
|
||||||
OP_CRMINPLUS
|
OP_CRMINPLUS
|
||||||
|
OP_CRPOSPLUS
|
||||||
OP_CRQUERY
|
OP_CRQUERY
|
||||||
OP_CRMINQUERY
|
OP_CRMINQUERY
|
||||||
|
OP_CRPOSQUERY
|
||||||
OP_CRRANGE
|
OP_CRRANGE
|
||||||
OP_CRMINRANGE
|
OP_CRMINRANGE
|
||||||
|
OP_CRPOSRANGE
|
||||||
|
|
||||||
All but the last two are just single-unit items. The others are followed by
|
All but the last three are single-unit items, with no data. The others are
|
||||||
four bytes (two shorts) of data, comprising the minimum and maximum repeat
|
followed by the minimum and maximum repeat counts.
|
||||||
counts. There are no special possessive opcodes for these repeats; a possessive
|
|
||||||
repeat is compiled into an atomic group.
|
|
||||||
|
|
||||||
|
|
||||||
Brackets and alternation
|
Brackets and alternation
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
A pair of non-capturing (round) brackets is wrapped round each expression at
|
A pair of non-capturing round brackets is wrapped round each expression at
|
||||||
compile time, so alternation always happens in the context of brackets.
|
compile time, so alternation always happens in the context of brackets.
|
||||||
|
|
||||||
[Note for North Americans: "bracket" to some English speakers, including
|
[Note for North Americans: "bracket" to some English speakers, including
|
||||||
@ -364,13 +409,13 @@ A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
|
|||||||
next alternative OP_ALT or, if there aren't any branches, to the matching
|
next alternative OP_ALT or, if there aren't any branches, to the matching
|
||||||
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
|
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
|
||||||
the next one, or to the OP_KET opcode. For capturing brackets, the bracket
|
the next one, or to the OP_KET opcode. For capturing brackets, the bracket
|
||||||
number immediately follows the offset, always as a 2-byte (one short) item.
|
number is a count that immediately follows the offset.
|
||||||
|
|
||||||
OP_KET is used for subpatterns that do not repeat indefinitely, and
|
OP_KET is used for subpatterns that do not repeat indefinitely, and OP_KETRMIN
|
||||||
OP_KETRMIN and OP_KETRMAX are used for indefinite repetitions, minimally or
|
and OP_KETRMAX are used for indefinite repetitions, minimally or maximally
|
||||||
maximally respectively (see below for possessive repetitions). All three are
|
respectively (see below for possessive repetitions). All three are followed by
|
||||||
followed by LINK_SIZE bytes giving (as a positive number) the offset back to
|
LINK_SIZE bytes giving (as a positive number) the offset back to the matching
|
||||||
the matching bracket opcode.
|
bracket opcode.
|
||||||
|
|
||||||
If a subpattern is quantified such that it is permitted to match zero times, it
|
If a subpattern is quantified such that it is permitted to match zero times, it
|
||||||
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
|
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
|
||||||
@ -397,6 +442,7 @@ final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
|
|||||||
that it needs to check for matching an empty string when it hits OP_KETRMIN or
|
that it needs to check for matching an empty string when it hits OP_KETRMIN or
|
||||||
OP_KETRMAX, and if so, to break the loop.
|
OP_KETRMAX, and if so, to break the loop.
|
||||||
|
|
||||||
|
|
||||||
Possessive brackets
|
Possessive brackets
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
@ -407,26 +453,34 @@ of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
|
|||||||
repetition is zero, the group is preceded by OP_BRAPOSZERO.
|
repetition is zero, the group is preceded by OP_BRAPOSZERO.
|
||||||
|
|
||||||
|
|
||||||
|
Once-only (atomic) groups
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
These are just like other subpatterns, but they start with the opcode
|
||||||
|
OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets
|
||||||
|
within the atomic group; the latter when there are. The distinction is needed
|
||||||
|
for when there is a backtrack to before the group - any captures within the
|
||||||
|
group must be reset, so it is necessary to retain backtracking points inside
|
||||||
|
the group even after it is complete in order to do this. When there are no
|
||||||
|
captures in an atomic group, all the backtracking can be discarded when it is
|
||||||
|
complete. This is more efficient, and also uses less stack.
|
||||||
|
|
||||||
|
The check for matching an empty string in an unbounded repeat is handled
|
||||||
|
entirely at runtime, so there are just these two opcodes for atomic groups.
|
||||||
|
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
----------
|
----------
|
||||||
|
|
||||||
Forward assertions are just like other subpatterns, but starting with one of
|
Forward assertions are also just like other subpatterns, but starting with one
|
||||||
the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
|
of the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
|
||||||
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
|
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
|
||||||
is OP_REVERSE, followed by a two byte (one short) count of the number of
|
is OP_REVERSE, followed by a count of the number of characters to move back the
|
||||||
characters to move back the pointer in the subject string. In ASCII mode, the
|
pointer in the subject string. In ASCII mode, the count is a number of units,
|
||||||
count is a number of units, but in UTF-8/16 mode each character may occupy more
|
but in UTF-8/16 mode each character may occupy more than one unit; in UTF-32
|
||||||
than one unit; in UTF-32 mode each character occupies exactly one unit.
|
mode each character occupies exactly one unit. A separate count is present in
|
||||||
A separate count is present in each alternative of a lookbehind
|
each alternative of a lookbehind assertion, allowing them to have different
|
||||||
assertion, allowing them to have different fixed lengths.
|
fixed lengths.
|
||||||
|
|
||||||
|
|
||||||
Once-only (atomic) subpatterns
|
|
||||||
------------------------------
|
|
||||||
|
|
||||||
These are also just like other subpatterns, but they start with the opcode
|
|
||||||
OP_ONCE. The check for matching an empty string in an unbounded repeat is
|
|
||||||
handled entirely at runtime, so there is just this one opcode.
|
|
||||||
|
|
||||||
|
|
||||||
Conditional subpatterns
|
Conditional subpatterns
|
||||||
@ -435,28 +489,29 @@ Conditional subpatterns
|
|||||||
These are like other subpatterns, but they start with the opcode OP_COND, or
|
These are like other subpatterns, but they start with the opcode OP_COND, or
|
||||||
OP_SCOND for one that might match an empty string in an unbounded repeat. If
|
OP_SCOND for one that might match an empty string in an unbounded repeat. If
|
||||||
the condition is a back reference, this is stored at the start of the
|
the condition is a back reference, this is stored at the start of the
|
||||||
subpattern using the opcode OP_CREF followed by two bytes (one short)
|
subpattern using the opcode OP_CREF followed by a count containing the
|
||||||
containing the reference number. OP_NCREF is used instead if the reference was
|
reference number, provided that the reference is to a unique capturing group.
|
||||||
generated by name (so that the runtime code knows to check for duplicate
|
If the reference was by name and there is more than one group with that name,
|
||||||
names).
|
OP_DNCREF is used instead. It is followed by two counts: the index in the group
|
||||||
|
names table, and the number of groups with the same name.
|
||||||
|
|
||||||
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
|
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
|
||||||
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
|
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
|
||||||
subpattern using the opcode OP_RREF or OP_NRREF (cf OP_NCREF), and a value of
|
subpattern using the opcode OP_RREF (with a value of zero for "the whole
|
||||||
zero for "the whole pattern". For a DEFINE condition, just the single unit
|
pattern") or OP_DNRREF (with data as for OP_DNCREF). For a DEFINE condition,
|
||||||
OP_DEF is used (it has no associated data). Otherwise, a conditional subpattern
|
just the single unit OP_DEF is used (it has no associated data). Otherwise, a
|
||||||
always starts with one of the assertions.
|
conditional subpattern always starts with one of the assertions.
|
||||||
|
|
||||||
|
|
||||||
Recursion
|
Recursion
|
||||||
---------
|
---------
|
||||||
|
|
||||||
Recursion either matches the current regex, or some subexpression. The opcode
|
Recursion either matches the current regex, or some subexpression. The opcode
|
||||||
OP_RECURSE is followed by an value which is the offset to the starting bracket
|
OP_RECURSE is followed by aLINK_SIZE value that is the offset to the starting
|
||||||
from the start of the whole pattern. From release 6.5, OP_RECURSE is
|
bracket from the start of the whole pattern. From release 6.5, OP_RECURSE is
|
||||||
automatically wrapped inside OP_ONCE brackets (because otherwise some patterns
|
automatically wrapped inside OP_ONCE brackets, because otherwise some patterns
|
||||||
broke it). OP_RECURSE is also used for "subroutine" calls, even though they
|
broke it. OP_RECURSE is also used for "subroutine" calls, even though they are
|
||||||
are not strictly a recursion.
|
not strictly a recursion.
|
||||||
|
|
||||||
|
|
||||||
Callout
|
Callout
|
||||||
@ -464,10 +519,10 @@ Callout
|
|||||||
|
|
||||||
OP_CALLOUT is followed by one unit of data that holds a callout number in the
|
OP_CALLOUT is followed by one unit of data that holds a callout number in the
|
||||||
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both
|
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both
|
||||||
cases there follows a two-byte (one short) value giving the offset in the
|
cases there follows a count giving the offset in the pattern string to the
|
||||||
pattern to the start of the following item, and another two-byte (one short)
|
start of the following item, and another count giving the length of this item.
|
||||||
item giving the length of the next item.
|
These values make is possible for pcretest to output useful tracing information
|
||||||
|
using automatic callouts.
|
||||||
|
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
February 2012
|
November 2013
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
Installation Instructions
|
Installation Instructions
|
||||||
*************************
|
*************************
|
||||||
|
|
||||||
Copyright (C) 1994-1996, 1999-2002, 2004-2011 Free Software Foundation,
|
Copyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation,
|
||||||
Inc.
|
Inc.
|
||||||
|
|
||||||
Copying and distribution of this file, with or without modification,
|
Copying and distribution of this file, with or without modification,
|
||||||
@ -12,8 +12,8 @@ without warranty of any kind.
|
|||||||
Basic Installation
|
Basic Installation
|
||||||
==================
|
==================
|
||||||
|
|
||||||
Briefly, the shell commands `./configure; make; make install' should
|
Briefly, the shell command `./configure && make && make install'
|
||||||
configure, build, and install this package. The following
|
should configure, build, and install this package. The following
|
||||||
more-detailed instructions are generic; see the `README' file for
|
more-detailed instructions are generic; see the `README' file for
|
||||||
instructions specific to this package. Some packages provide this
|
instructions specific to this package. Some packages provide this
|
||||||
`INSTALL' file but do not implement all of the features documented
|
`INSTALL' file but do not implement all of the features documented
|
||||||
@ -309,9 +309,10 @@ causes the specified `gcc' to be used as the C compiler (unless it is
|
|||||||
overridden in the site shell script).
|
overridden in the site shell script).
|
||||||
|
|
||||||
Unfortunately, this technique does not work for `CONFIG_SHELL' due to
|
Unfortunately, this technique does not work for `CONFIG_SHELL' due to
|
||||||
an Autoconf bug. Until the bug is fixed you can use this workaround:
|
an Autoconf limitation. Until the limitation is lifted, you can use
|
||||||
|
this workaround:
|
||||||
|
|
||||||
CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
|
CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash
|
||||||
|
|
||||||
`configure' Invocation
|
`configure' Invocation
|
||||||
======================
|
======================
|
||||||
@ -367,4 +368,3 @@ operates.
|
|||||||
|
|
||||||
`configure' also accepts some other, not widely useful, options. Run
|
`configure' also accepts some other, not widely useful, options. Run
|
||||||
`configure --help' for more details.
|
`configure --help' for more details.
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ Email domain: cam.ac.uk
|
|||||||
University of Cambridge Computing Service,
|
University of Cambridge Computing Service,
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
|
|
||||||
Copyright (c) 1997-2012 University of Cambridge
|
Copyright (c) 1997-2014 University of Cambridge
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
@ -35,7 +35,7 @@ Written by: Zoltan Herczeg
|
|||||||
Email local part: hzmester
|
Email local part: hzmester
|
||||||
Emain domain: freemail.hu
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
Copyright(c) 2010-2012 Zoltan Herczeg
|
Copyright(c) 2010-2014 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
@ -46,7 +46,7 @@ Written by: Zoltan Herczeg
|
|||||||
Email local part: hzmester
|
Email local part: hzmester
|
||||||
Emain domain: freemail.hu
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
Copyright(c) 2009-2012 Zoltan Herczeg
|
Copyright(c) 2009-2014 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
@ -14,11 +14,16 @@ dist_doc_DATA = \
|
|||||||
NEWS \
|
NEWS \
|
||||||
README
|
README
|
||||||
|
|
||||||
|
# Note that pcrecpp.html is not in this list; it is listed separately below.
|
||||||
|
|
||||||
dist_html_DATA = \
|
dist_html_DATA = \
|
||||||
|
doc/html/NON-AUTOTOOLS-BUILD.txt \
|
||||||
|
doc/html/README.txt \
|
||||||
doc/html/index.html \
|
doc/html/index.html \
|
||||||
|
doc/html/pcre-config.html \
|
||||||
doc/html/pcre.html \
|
doc/html/pcre.html \
|
||||||
doc/html/pcre16.html \
|
doc/html/pcre16.html \
|
||||||
doc/html/pcre-config.html \
|
doc/html/pcre32.html \
|
||||||
doc/html/pcre_assign_jit_stack.html \
|
doc/html/pcre_assign_jit_stack.html \
|
||||||
doc/html/pcre_compile.html \
|
doc/html/pcre_compile.html \
|
||||||
doc/html/pcre_compile2.html \
|
doc/html/pcre_compile2.html \
|
||||||
@ -44,6 +49,7 @@ dist_html_DATA = \
|
|||||||
doc/html/pcre_refcount.html \
|
doc/html/pcre_refcount.html \
|
||||||
doc/html/pcre_study.html \
|
doc/html/pcre_study.html \
|
||||||
doc/html/pcre_utf16_to_host_byte_order.html \
|
doc/html/pcre_utf16_to_host_byte_order.html \
|
||||||
|
doc/html/pcre_utf32_to_host_byte_order.html \
|
||||||
doc/html/pcre_version.html \
|
doc/html/pcre_version.html \
|
||||||
doc/html/pcreapi.html \
|
doc/html/pcreapi.html \
|
||||||
doc/html/pcrebuild.html \
|
doc/html/pcrebuild.html \
|
||||||
@ -65,10 +71,6 @@ dist_html_DATA = \
|
|||||||
doc/html/pcretest.html \
|
doc/html/pcretest.html \
|
||||||
doc/html/pcreunicode.html
|
doc/html/pcreunicode.html
|
||||||
|
|
||||||
# doc/html/pcre32.html \
|
|
||||||
# doc/html/pcre_utf32_to_host_byte_order.html \
|
|
||||||
#
|
|
||||||
|
|
||||||
pcrecpp_html = doc/html/pcrecpp.html
|
pcrecpp_html = doc/html/pcrecpp.html
|
||||||
dist_noinst_DATA = $(pcrecpp_html)
|
dist_noinst_DATA = $(pcrecpp_html)
|
||||||
|
|
||||||
@ -140,14 +142,16 @@ pcre.h.generic: pcre.h.in configure.ac
|
|||||||
cp -p pcre.h $@
|
cp -p pcre.h $@
|
||||||
|
|
||||||
# It is more complicated for config.h.generic. We need the version that results
|
# It is more complicated for config.h.generic. We need the version that results
|
||||||
# from a default configuration. We can get this by doing a configure in a
|
# from a default configuration so as to get all the default values for PCRE
|
||||||
# temporary directory. However, some trickery is needed,
|
# configuration macros such as MATCH_LIMIT and NEWLINE. We can get this by
|
||||||
# because the source directory may already be configured. If you
|
# doing a configure in a temporary directory. However, some trickery is needed,
|
||||||
# just try running configure in a new directory, it complains. For this reason,
|
# because the source directory may already be configured. If you just try
|
||||||
# we move config.status out of the way while doing the default configuration.
|
# running configure in a new directory, it complains. For this reason, we move
|
||||||
# The resulting config.h is munged by perl to put #ifdefs round any #defines
|
# config.status out of the way while doing the default configuration. The
|
||||||
# and to get rid of any gcc-specific visibility settings. Make sure that
|
# resulting config.h is munged by perl to put #ifdefs round any #defines for
|
||||||
# PCRE_EXP_DEFN is unset (in case it has visibility settings).
|
# macros with values, and to #undef all boolean macros such as HAVE_xxx and
|
||||||
|
# SUPPORT_xxx. We also get rid of any gcc-specific visibility settings. Make
|
||||||
|
# sure that PCRE_EXP_DEFN is unset (in case it has visibility settings).
|
||||||
config.h.generic: configure.ac
|
config.h.generic: configure.ac
|
||||||
rm -rf $@ _generic
|
rm -rf $@ _generic
|
||||||
mkdir _generic
|
mkdir _generic
|
||||||
@ -160,8 +164,10 @@ config.h.generic: configure.ac
|
|||||||
-e 'if(/PCRE_EXP_DEFN/){print"/* #undef PCRE_EXP_DEFN */\n";$$blank=0;next;}' \
|
-e 'if(/PCRE_EXP_DEFN/){print"/* #undef PCRE_EXP_DEFN */\n";$$blank=0;next;}' \
|
||||||
-e 'if(/to make a symbol visible/){next;}' \
|
-e 'if(/to make a symbol visible/){next;}' \
|
||||||
-e 'if(/__attribute__ \(\(visibility/){next;}' \
|
-e 'if(/__attribute__ \(\(visibility/){next;}' \
|
||||||
-e 'if(/^#define\s(?!PACKAGE)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;}' \
|
-e 'if(/LT_OBJDIR/){print"/* This is ignored unless you are using libtool. */\n";}' \
|
||||||
-e 'else {if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}}' \
|
-e 'if(/^#define\s((?:HAVE|SUPPORT|STDC)_\w+)/){print"/* #undef $$1 */\n";$$blank=0;next;}' \
|
||||||
|
-e 'if(/^#define\s(?!PACKAGE|VERSION)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;next;}' \
|
||||||
|
-e 'if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}' \
|
||||||
_generic/config.h >$@
|
_generic/config.h >$@
|
||||||
rm -rf _generic
|
rm -rf _generic
|
||||||
|
|
||||||
@ -344,15 +350,19 @@ EXTRA_DIST += \
|
|||||||
sljit/sljitExecAllocator.c \
|
sljit/sljitExecAllocator.c \
|
||||||
sljit/sljitLir.c \
|
sljit/sljitLir.c \
|
||||||
sljit/sljitLir.h \
|
sljit/sljitLir.h \
|
||||||
sljit/sljitNativeARM_Thumb2.c \
|
sljit/sljitNativeARM_32.c \
|
||||||
sljit/sljitNativeARM_v5.c \
|
sljit/sljitNativeARM_64.c \
|
||||||
|
sljit/sljitNativeARM_T2_32.c \
|
||||||
sljit/sljitNativeMIPS_32.c \
|
sljit/sljitNativeMIPS_32.c \
|
||||||
|
sljit/sljitNativeMIPS_64.c \
|
||||||
sljit/sljitNativeMIPS_common.c \
|
sljit/sljitNativeMIPS_common.c \
|
||||||
sljit/sljitNativePPC_32.c \
|
sljit/sljitNativePPC_32.c \
|
||||||
sljit/sljitNativePPC_64.c \
|
sljit/sljitNativePPC_64.c \
|
||||||
sljit/sljitNativePPC_common.c \
|
sljit/sljitNativePPC_common.c \
|
||||||
sljit/sljitNativeSPARC_32.c \
|
sljit/sljitNativeSPARC_32.c \
|
||||||
sljit/sljitNativeSPARC_common.c \
|
sljit/sljitNativeSPARC_common.c \
|
||||||
|
sljit/sljitNativeTILEGX_64.c \
|
||||||
|
sljit/sljitNativeTILEGX-encoder.c \
|
||||||
sljit/sljitNativeX86_32.c \
|
sljit/sljitNativeX86_32.c \
|
||||||
sljit/sljitNativeX86_64.c \
|
sljit/sljitNativeX86_64.c \
|
||||||
sljit/sljitNativeX86_common.c \
|
sljit/sljitNativeX86_common.c \
|
||||||
@ -572,6 +582,8 @@ EXTRA_DIST += \
|
|||||||
testdata/testoutput1 \
|
testdata/testoutput1 \
|
||||||
testdata/testoutput2 \
|
testdata/testoutput2 \
|
||||||
testdata/testoutput3 \
|
testdata/testoutput3 \
|
||||||
|
testdata/testoutput3A \
|
||||||
|
testdata/testoutput3B \
|
||||||
testdata/testoutput4 \
|
testdata/testoutput4 \
|
||||||
testdata/testoutput5 \
|
testdata/testoutput5 \
|
||||||
testdata/testoutput6 \
|
testdata/testoutput6 \
|
||||||
@ -610,8 +622,10 @@ CLEANFILES += \
|
|||||||
teststderr \
|
teststderr \
|
||||||
testtemp* \
|
testtemp* \
|
||||||
testtry \
|
testtry \
|
||||||
testNinput
|
testNinput \
|
||||||
|
testtrygrep \
|
||||||
|
teststderrgrep \
|
||||||
|
testNinputgrep
|
||||||
|
|
||||||
# PCRE demonstration program. No longer built automatcally. The point is that
|
# PCRE demonstration program. No longer built automatcally. The point is that
|
||||||
# the users should build it themselves. So just distribute the source.
|
# the users should build it themselves. So just distribute the source.
|
||||||
@ -659,11 +673,13 @@ if WITH_PCRE_CPP
|
|||||||
pkgconfig_DATA += libpcrecpp.pc
|
pkgconfig_DATA += libpcrecpp.pc
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
# Note that pcrecpp.3 is not in this list, but is included separately below.
|
||||||
|
|
||||||
dist_man_MANS = \
|
dist_man_MANS = \
|
||||||
|
doc/pcre-config.1 \
|
||||||
doc/pcre.3 \
|
doc/pcre.3 \
|
||||||
doc/pcre16.3 \
|
doc/pcre16.3 \
|
||||||
doc/pcre32.3 \
|
doc/pcre32.3 \
|
||||||
doc/pcre-config.1 \
|
|
||||||
doc/pcre_assign_jit_stack.3 \
|
doc/pcre_assign_jit_stack.3 \
|
||||||
doc/pcre_compile.3 \
|
doc/pcre_compile.3 \
|
||||||
doc/pcre_compile2.3 \
|
doc/pcre_compile2.3 \
|
||||||
@ -695,6 +711,7 @@ dist_man_MANS = \
|
|||||||
doc/pcrebuild.3 \
|
doc/pcrebuild.3 \
|
||||||
doc/pcrecallout.3 \
|
doc/pcrecallout.3 \
|
||||||
doc/pcrecompat.3 \
|
doc/pcrecompat.3 \
|
||||||
|
doc/pcredemo.3 \
|
||||||
doc/pcregrep.1 \
|
doc/pcregrep.1 \
|
||||||
doc/pcrejit.3 \
|
doc/pcrejit.3 \
|
||||||
doc/pcrelimits.3 \
|
doc/pcrelimits.3 \
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,88 @@
|
|||||||
News about PCRE releases
|
News about PCRE releases
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
|
Release 8.35 04-April-2014
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
There have been performance improvements for classes containing non-ASCII
|
||||||
|
characters and the "auto-possessification" feature has been extended. Other
|
||||||
|
minor improvements have been implemented and bugs fixed. There is a new callout
|
||||||
|
feature to enable applications to do detailed stack checks at compile time, to
|
||||||
|
avoid running out of stack for deeply nested parentheses. The JIT compiler has
|
||||||
|
been extended with experimental support for ARM-64, MIPS-64, and PPC-LE.
|
||||||
|
|
||||||
|
|
||||||
|
Release 8.34 15-December-2013
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
As well as fixing the inevitable bugs, performance has been improved by
|
||||||
|
refactoring and extending the amount of "auto-possessification" that PCRE does.
|
||||||
|
Other notable changes:
|
||||||
|
|
||||||
|
. Implemented PCRE_INFO_MATCH_EMPTY, which yields 1 if the pattern can match
|
||||||
|
an empty string. If it can, pcretest shows this in its information output.
|
||||||
|
|
||||||
|
. A back reference to a named subpattern when there is more than one of the
|
||||||
|
same name now checks them in the order in which they appear in the pattern.
|
||||||
|
The first one that is set is used for the reference. Previously only the
|
||||||
|
first one was inspected. This change makes PCRE more compatible with Perl.
|
||||||
|
|
||||||
|
. Unicode character properties were updated from Unicode 6.3.0.
|
||||||
|
|
||||||
|
. The character VT has been added to the set of characters that match \s and
|
||||||
|
are generally treated as white space, following this same change in Perl
|
||||||
|
5.18. There is now no difference between "Perl space" and "POSIX space".
|
||||||
|
|
||||||
|
. Perl has changed its handling of \8 and \9. If there is no previously
|
||||||
|
encountered capturing group of those numbers, they are treated as the
|
||||||
|
literal characters 8 and 9 instead of a binary zero followed by the
|
||||||
|
literals. PCRE now does the same.
|
||||||
|
|
||||||
|
. Following Perl, added \o{} to specify codepoints in octal, making it
|
||||||
|
possible to specify values greater than 0777 and also making them
|
||||||
|
unambiguous.
|
||||||
|
|
||||||
|
. In UCP mode, \s was not matching two of the characters that Perl matches,
|
||||||
|
namely NEL (U+0085) and MONGOLIAN VOWEL SEPARATOR (U+180E), though they
|
||||||
|
were matched by \h.
|
||||||
|
|
||||||
|
. Add JIT support for the 64 bit TileGX architecture.
|
||||||
|
|
||||||
|
. Upgraded the handling of the POSIX classes [:graph:], [:print:], and
|
||||||
|
[:punct:] when PCRE_UCP is set so as to include the same characters as Perl
|
||||||
|
does in Unicode mode.
|
||||||
|
|
||||||
|
. Perl no longer allows group names to start with digits, so I have made this
|
||||||
|
change also in PCRE.
|
||||||
|
|
||||||
|
. Added support for [[:<:]] and [[:>:]] as used in the BSD POSIX library to
|
||||||
|
mean "start of word" and "end of word", respectively, as a transition aid.
|
||||||
|
|
||||||
|
|
||||||
|
Release 8.33 28-May-2013
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
A number of bugs are fixed, and some performance improvements have been made.
|
||||||
|
There are also some new features, of which these are the most important:
|
||||||
|
|
||||||
|
. The behaviour of the backtracking verbs has been rationalized and
|
||||||
|
documented in more detail.
|
||||||
|
|
||||||
|
. JIT now supports callouts and all of the backtracking verbs.
|
||||||
|
|
||||||
|
. Unicode validation has been updated in the light of Unicode Corrigendum #9,
|
||||||
|
which points out that "non characters" are not "characters that may not
|
||||||
|
appear in Unicode strings" but rather "characters that are reserved for
|
||||||
|
internal use and have only local meaning".
|
||||||
|
|
||||||
|
. (*LIMIT_MATCH=d) and (*LIMIT_RECURSION=d) have been added so that the
|
||||||
|
creator of a pattern can specify lower (but not higher) limits for the
|
||||||
|
matching process.
|
||||||
|
|
||||||
|
. The PCRE_NEVER_UTF option is available to prevent pattern-writers from using
|
||||||
|
the (*UTF) feature, as this could be a security issue.
|
||||||
|
|
||||||
|
|
||||||
Release 8.32 30-November-2012
|
Release 8.32 30-November-2012
|
||||||
-----------------------------
|
-----------------------------
|
||||||
|
|
||||||
@ -591,7 +673,7 @@ some of the new functionality in Perl 5.005.
|
|||||||
Another (I hope this is the last!) change has been made to the API for the
|
Another (I hope this is the last!) change has been made to the API for the
|
||||||
pcre_compile() function. An additional argument has been added to make it
|
pcre_compile() function. An additional argument has been added to make it
|
||||||
possible to pass over a pointer to character tables built in the current
|
possible to pass over a pointer to character tables built in the current
|
||||||
locale by pcre_maketables(). To use the default tables, this new arguement
|
locale by pcre_maketables(). To use the default tables, this new argument
|
||||||
should be passed as NULL.
|
should be passed as NULL.
|
||||||
|
|
||||||
IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.05
|
IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.05
|
||||||
|
@ -9,11 +9,14 @@ This document contains the following sections:
|
|||||||
Building for virtual Pascal
|
Building for virtual Pascal
|
||||||
Stack size in Windows environments
|
Stack size in Windows environments
|
||||||
Linking programs in Windows environments
|
Linking programs in Windows environments
|
||||||
|
Calling conventions in Windows environments
|
||||||
Comments about Win32 builds
|
Comments about Win32 builds
|
||||||
Building PCRE on Windows with CMake
|
Building PCRE on Windows with CMake
|
||||||
Use of relative paths with CMake on Windows
|
Use of relative paths with CMake on Windows
|
||||||
Testing with RunTest.bat
|
Testing with RunTest.bat
|
||||||
|
Building under Windows CE with Visual Studio 200x
|
||||||
Building under Windows with BCC5.5
|
Building under Windows with BCC5.5
|
||||||
|
Building using Borland C++ Builder 2007 (CB2007) and higher
|
||||||
Building PCRE on OpenVMS
|
Building PCRE on OpenVMS
|
||||||
Building PCRE on Stratus OpenVOS
|
Building PCRE on Stratus OpenVOS
|
||||||
Building PCRE on native z/OS and z/VM
|
Building PCRE on native z/OS and z/VM
|
||||||
@ -168,8 +171,8 @@ can skip ahead to the CMake section.
|
|||||||
pcre16_version.c
|
pcre16_version.c
|
||||||
pcre16_xclass.c
|
pcre16_xclass.c
|
||||||
|
|
||||||
(7') If you want to build a 16-bit library (as well as, or instead of the 8-bit
|
(8) If you want to build a 32-bit library (as well as, or instead of the 8-bit
|
||||||
or 32-bit libraries) repeat steps 5-6 with the following files:
|
or 16-bit libraries) repeat steps 5-6 with the following files:
|
||||||
|
|
||||||
pcre32_byte_order.c
|
pcre32_byte_order.c
|
||||||
pcre32_chartables.c
|
pcre32_chartables.c
|
||||||
@ -194,30 +197,31 @@ can skip ahead to the CMake section.
|
|||||||
pcre32_version.c
|
pcre32_version.c
|
||||||
pcre32_xclass.c
|
pcre32_xclass.c
|
||||||
|
|
||||||
(8) If you want to build the POSIX wrapper functions (which apply only to the
|
(9) If you want to build the POSIX wrapper functions (which apply only to the
|
||||||
8-bit library), ensure that you have the pcreposix.h file and then compile
|
8-bit library), ensure that you have the pcreposix.h file and then compile
|
||||||
pcreposix.c (remembering -DHAVE_CONFIG_H if necessary). Link the result
|
pcreposix.c (remembering -DHAVE_CONFIG_H if necessary). Link the result
|
||||||
(on its own) as the pcreposix library.
|
(on its own) as the pcreposix library.
|
||||||
|
|
||||||
(9) The pcretest program can be linked with any combination of the 8-bit, 16-bit
|
(10) The pcretest program can be linked with any combination of the 8-bit,
|
||||||
and 32-bit libraries (depending on what you selected in config.h). Compile
|
16-bit and 32-bit libraries (depending on what you selected in config.h).
|
||||||
pcretest.c and pcre_printint.c (again, don't forget -DHAVE_CONFIG_H) and
|
Compile pcretest.c and pcre_printint.c (again, don't forget
|
||||||
link them together with the appropriate library/ies. If you compiled an
|
-DHAVE_CONFIG_H) and link them together with the appropriate library/ies.
|
||||||
8-bit library, pcretest also needs the pcreposix wrapper library unless
|
If you compiled an 8-bit library, pcretest also needs the pcreposix
|
||||||
you compiled it with -DNOPOSIX.
|
wrapper library unless you compiled it with -DNOPOSIX.
|
||||||
|
|
||||||
(10) Run pcretest on the testinput files in the testdata directory, and check
|
(11) Run pcretest on the testinput files in the testdata directory, and check
|
||||||
that the output matches the corresponding testoutput files. There are
|
that the output matches the corresponding testoutput files. There are
|
||||||
comments about what each test does in the section entitled "Testing PCRE"
|
comments about what each test does in the section entitled "Testing PCRE"
|
||||||
in the README file. If you compiled more than one of the 8-bit, 16-bit and
|
in the README file. If you compiled more than one of the 8-bit, 16-bit and
|
||||||
32-bit libraries, you need to run pcretest with the -16 option to do 16-bit
|
32-bit libraries, you need to run pcretest with the -16 option to do
|
||||||
tests and with the -32 option to do 32-bit tests.
|
16-bit tests and with the -32 option to do 32-bit tests.
|
||||||
|
|
||||||
Some tests are relevant only when certain build-time options are selected.
|
Some tests are relevant only when certain build-time options are selected.
|
||||||
For example, test 4 is for UTF-8/UTF-16/UTF-32 support, and will not run if
|
For example, test 4 is for UTF-8/UTF-16/UTF-32 support, and will not run
|
||||||
you have built PCRE without it. See the comments at the start of each
|
if you have built PCRE without it. See the comments at the start of each
|
||||||
testinput file. If you have a suitable Unix-like shell, the RunTest script
|
testinput file. If you have a suitable Unix-like shell, the RunTest script
|
||||||
will run the appropriate tests for you.
|
will run the appropriate tests for you. The command "RunTest list" will
|
||||||
|
output a list of all the tests.
|
||||||
|
|
||||||
Note that the supplied files are in Unix format, with just LF characters
|
Note that the supplied files are in Unix format, with just LF characters
|
||||||
as line terminators. You may need to edit them to change this if your
|
as line terminators. You may need to edit them to change this if your
|
||||||
@ -227,11 +231,11 @@ can skip ahead to the CMake section.
|
|||||||
locale to "french" rather than "fr_FR", and there some minor output
|
locale to "french" rather than "fr_FR", and there some minor output
|
||||||
differences.
|
differences.
|
||||||
|
|
||||||
(11) If you have built PCRE with SUPPORT_JIT, the JIT features will be tested
|
(12) If you have built PCRE with SUPPORT_JIT, the JIT features will be tested
|
||||||
by the testdata files. However, you might also like to build and run
|
by the testdata files. However, you might also like to build and run
|
||||||
the JIT test program, pcre_jit_test.c.
|
the freestanding JIT test program, pcre_jit_test.c.
|
||||||
|
|
||||||
(12) If you want to use the pcregrep command, compile and link pcregrep.c; it
|
(13) If you want to use the pcregrep command, compile and link pcregrep.c; it
|
||||||
uses only the basic 8-bit PCRE library (it does not need the pcreposix
|
uses only the basic 8-bit PCRE library (it does not need the pcreposix
|
||||||
library).
|
library).
|
||||||
|
|
||||||
@ -428,12 +432,9 @@ CMake build process. In the CMake GUI, the cache can be deleted by selecting
|
|||||||
|
|
||||||
USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS
|
USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS
|
||||||
|
|
||||||
A PCRE user comments as follows:
|
A PCRE user comments as follows: I thought that others may want to know the
|
||||||
|
current state of CMAKE_USE_RELATIVE_PATHS support on Windows. Here it is:
|
||||||
|
|
||||||
I thought that others may want to know the current state of
|
|
||||||
CMAKE_USE_RELATIVE_PATHS support on Windows.
|
|
||||||
|
|
||||||
Here it is:
|
|
||||||
-- AdditionalIncludeDirectories is only partially modified (only the
|
-- AdditionalIncludeDirectories is only partially modified (only the
|
||||||
first path - see below)
|
first path - see below)
|
||||||
-- Only some of the contained file paths are modified - shown below for
|
-- Only some of the contained file paths are modified - shown below for
|
||||||
@ -449,9 +450,9 @@ deal.
|
|||||||
AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;"
|
AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;"
|
||||||
AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;"
|
AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;"
|
||||||
|
|
||||||
RelativePath="pcre.h">
|
RelativePath="pcre.h"
|
||||||
RelativePath="pcre_chartables.c">
|
RelativePath="pcre_chartables.c"
|
||||||
RelativePath="pcre_chartables.c.rule">
|
RelativePath="pcre_chartables.c.rule"
|
||||||
|
|
||||||
|
|
||||||
TESTING WITH RUNTEST.BAT
|
TESTING WITH RUNTEST.BAT
|
||||||
@ -489,20 +490,6 @@ To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and
|
|||||||
pcre_scanner_unittest.exe.
|
pcre_scanner_unittest.exe.
|
||||||
|
|
||||||
|
|
||||||
BUILDING UNDER WINDOWS WITH BCC5.5
|
|
||||||
|
|
||||||
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
|
|
||||||
|
|
||||||
Some of the core BCC libraries have a version of PCRE from 1998 built in,
|
|
||||||
which can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a
|
|
||||||
version mismatch. I'm including an easy workaround below, if you'd like to
|
|
||||||
include it in the non-unix instructions:
|
|
||||||
|
|
||||||
When linking a project with BCC5.5, pcre.lib must be included before any of
|
|
||||||
the libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command
|
|
||||||
line.
|
|
||||||
|
|
||||||
|
|
||||||
BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x
|
BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x
|
||||||
|
|
||||||
Vincent Richomme sent a zip archive of files to help with this process. They
|
Vincent Richomme sent a zip archive of files to help with this process. They
|
||||||
@ -510,11 +497,149 @@ can be found in the file "pcre-vsbuild.zip" in the Contrib directory of the FTP
|
|||||||
site.
|
site.
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING UNDER WINDOWS WITH BCC5.5
|
||||||
|
|
||||||
|
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
|
||||||
|
|
||||||
|
Some of the core BCC libraries have a version of PCRE from 1998 built in, which
|
||||||
|
can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a version
|
||||||
|
mismatch. I'm including an easy workaround below, if you'd like to include it
|
||||||
|
in the non-unix instructions:
|
||||||
|
|
||||||
|
When linking a project with BCC5.5, pcre.lib must be included before any of the
|
||||||
|
libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command line.
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING USING BORLAND C++ BUILDER 2007 (CB2007) AND HIGHER
|
||||||
|
|
||||||
|
A PCRE user sent these comments about this environment (see also the comment
|
||||||
|
from another user that follows them):
|
||||||
|
|
||||||
|
The XE versions of C++ Builder come with a RegularExpressionsCore class which
|
||||||
|
contain a version of TPerlRegEx. However, direct use of the C PCRE library may
|
||||||
|
be desirable.
|
||||||
|
|
||||||
|
The default makevp.bat, however, supplied with PCRE builds a version of PCRE
|
||||||
|
that is not usable with any version of C++ Builder because the compiler ships
|
||||||
|
with an embedded version of PCRE, version 2.01 from 1998! [See also the note
|
||||||
|
about BCC5.5 above.] If you want to use PCRE you'll need to rename the
|
||||||
|
functions (pcre_compile to pcre_compile_bcc, etc) or do as I have done and just
|
||||||
|
use the 16 bit versions. I'm using std::wstring everywhere anyway. Since the
|
||||||
|
embedded version of PCRE does not have the 16 bit function names, there is no
|
||||||
|
conflict.
|
||||||
|
|
||||||
|
Building PCRE using a C++ Builder static library project file (recommended):
|
||||||
|
|
||||||
|
1. Rename or remove pcre.h, pcreposi.h, and pcreposix.h from your C++ Builder
|
||||||
|
original include path.
|
||||||
|
|
||||||
|
2. Download PCRE from pcre.org and extract to a directory.
|
||||||
|
|
||||||
|
3. Rename pcre_chartables.c.dist to pcre_chartables.c, pcre.h.generic to
|
||||||
|
pcre.h, and config.h.generic to config.h.
|
||||||
|
|
||||||
|
4. Edit pcre.h and pcre_config.c so that they include config.h.
|
||||||
|
|
||||||
|
5. Edit config.h like so:
|
||||||
|
|
||||||
|
Comment out the following lines:
|
||||||
|
#define PACKAGE "pcre"
|
||||||
|
#define PACKAGE_BUGREPORT ""
|
||||||
|
#define PACKAGE_NAME "PCRE"
|
||||||
|
#define PACKAGE_STRING "PCRE 8.32"
|
||||||
|
#define PACKAGE_TARNAME "pcre"
|
||||||
|
#define PACKAGE_URL ""
|
||||||
|
#define PACKAGE_VERSION "8.32"
|
||||||
|
|
||||||
|
Add the following lines:
|
||||||
|
#ifndef SUPPORT_UTF
|
||||||
|
#define SUPPORT_UTF 100 // any value is fine
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef SUPPORT_UCP
|
||||||
|
#define SUPPORT_UCP 101 // any value is fine
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef SUPPORT_UCP
|
||||||
|
#define SUPPORT_PCRE16 102 // any value is fine
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef SUPPORT_UTF8
|
||||||
|
#define SUPPORT_UTF8 103 // any value is fine
|
||||||
|
#endif
|
||||||
|
|
||||||
|
6. Build a C++ Builder project using the IDE. Go to File / New / Other and
|
||||||
|
choose Static Library. You can name it pcre.cbproj or whatever. Now set your
|
||||||
|
paths by going to Project / Options. Set the Include path. Do this from the
|
||||||
|
"Base" option to apply to both Release and Debug builds. Now add the following
|
||||||
|
files to the project:
|
||||||
|
|
||||||
|
pcre.h
|
||||||
|
pcre16_byte_order.c
|
||||||
|
pcre16_chartables.c
|
||||||
|
pcre16_compile.c
|
||||||
|
pcre16_config.c
|
||||||
|
pcre16_dfa_exec.c
|
||||||
|
pcre16_exec.c
|
||||||
|
pcre16_fullinfo.c
|
||||||
|
pcre16_get.c
|
||||||
|
pcre16_globals.c
|
||||||
|
pcre16_maketables.c
|
||||||
|
pcre16_newline.c
|
||||||
|
pcre16_ord2utf16.c
|
||||||
|
pcre16_printint.c
|
||||||
|
pcre16_refcount.c
|
||||||
|
pcre16_string_utils.c
|
||||||
|
pcre16_study.c
|
||||||
|
pcre16_tables.c
|
||||||
|
pcre16_ucd.c
|
||||||
|
pcre16_utf16_utils.c
|
||||||
|
pcre16_valid_utf16.c
|
||||||
|
pcre16_version.c
|
||||||
|
pcre16_xclass.c
|
||||||
|
|
||||||
|
//Optional
|
||||||
|
pcre_version.c
|
||||||
|
|
||||||
|
7. After compiling the .lib file, copy the .lib and header files to a project
|
||||||
|
you want to use PCRE with. Enjoy.
|
||||||
|
|
||||||
|
Optional ... Building PCRE using the makevp.bat file:
|
||||||
|
|
||||||
|
1. Edit makevp_c.txt and makevp_l.txt and change all the names to the 16 bit
|
||||||
|
versions.
|
||||||
|
|
||||||
|
2. Edit makevp.bat and set the path to C++ Builder. Run makevp.bat.
|
||||||
|
|
||||||
|
Another PCRE user added this comment:
|
||||||
|
|
||||||
|
Another approach I successfully used for some years with BCB 5 and 6 was to
|
||||||
|
make sure that include and library paths of PCRE are configured before the
|
||||||
|
default paths of the IDE in the dialogs where one can manage those paths.
|
||||||
|
Afterwards one can open the project files using a text editor and manually add
|
||||||
|
the self created library for pcre itself, pcrecpp doesn't ship with the IDE, in
|
||||||
|
the library nodes where the IDE manages its own libraries to link against in
|
||||||
|
front of the IDE-own libraries. This way one can use the default PCRE function
|
||||||
|
names without getting access violations on runtime.
|
||||||
|
|
||||||
|
<ALLLIB value="libpcre.lib $(LIBFILES) $(LIBRARIES) import32.lib cp32mt.lib"/>
|
||||||
|
|
||||||
|
|
||||||
BUILDING PCRE ON OPENVMS
|
BUILDING PCRE ON OPENVMS
|
||||||
|
|
||||||
Dan Mooney sent the following comments about building PCRE on OpenVMS. They
|
Stephen Hoffman sent the following, in December 2012:
|
||||||
relate to an older version of PCRE that used fewer source files, so the exact
|
|
||||||
commands will need changing. See the current list of source files above.
|
"Here <http://labs.hoffmanlabs.com/node/1847> is a very short write-up on the
|
||||||
|
OpenVMS port and here
|
||||||
|
|
||||||
|
<http://labs.hoffmanlabs.com/labsnotes/pcre-vms-8_32.zip>
|
||||||
|
|
||||||
|
is a zip with the OpenVMS files, and with one modified testing-related PCRE
|
||||||
|
file." This is a port of PCRE 8.32.
|
||||||
|
|
||||||
|
Earlier, Dan Mooney sent the following comments about building PCRE on OpenVMS.
|
||||||
|
They relate to an older version of PCRE that used fewer source files, so the
|
||||||
|
exact commands will need changing. See the current list of source files above.
|
||||||
|
|
||||||
"It was quite easy to compile and link the library. I don't have a formal
|
"It was quite easy to compile and link the library. I don't have a formal
|
||||||
make file but the attached file [reproduced below] contains the OpenVMS DCL
|
make file but the attached file [reproduced below] contains the OpenVMS DCL
|
||||||
@ -636,4 +761,4 @@ There is also a mirror here:
|
|||||||
http://www.vsoft-software.com/downloads.html
|
http://www.vsoft-software.com/downloads.html
|
||||||
|
|
||||||
==========================
|
==========================
|
||||||
Last Updated: 21 November 2012
|
Last Updated: 14 May 2013
|
||||||
|
@ -25,6 +25,12 @@
|
|||||||
# when the HTML documentation is built. It works like this so that
|
# when the HTML documentation is built. It works like this so that
|
||||||
# doc/html can be deleted and re-created from scratch.
|
# doc/html can be deleted and re-created from scratch.
|
||||||
|
|
||||||
|
# README & NON-AUTOTOOLS-BUILD
|
||||||
|
# These files are copied into the doc/html directory, with .txt
|
||||||
|
# extensions so that they can by hyperlinked from the HTML
|
||||||
|
# documentation, because some people just go to the HTML without
|
||||||
|
# looking for text files.
|
||||||
|
|
||||||
|
|
||||||
# First, sort out the documentation. Remove pcredemo.3 first because it won't
|
# First, sort out the documentation. Remove pcredemo.3 first because it won't
|
||||||
# pass the markup check (it is created below, using markup that none of the
|
# pass the markup check (it is created below, using markup that none of the
|
||||||
@ -122,6 +128,8 @@ if [ $? != 0 ] ; then exit 1; fi
|
|||||||
echo "Making HTML documentation"
|
echo "Making HTML documentation"
|
||||||
/bin/rm html/*
|
/bin/rm html/*
|
||||||
cp index.html.src html/index.html
|
cp index.html.src html/index.html
|
||||||
|
cp ../README html/README.txt
|
||||||
|
cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
|
||||||
|
|
||||||
for file in *.1 ; do
|
for file in *.1 ; do
|
||||||
base=`basename $file .1`
|
base=`basename $file .1`
|
||||||
@ -218,7 +226,6 @@ files="\
|
|||||||
pcre_string_utils.c \
|
pcre_string_utils.c \
|
||||||
pcre_study.c \
|
pcre_study.c \
|
||||||
pcre_tables.c \
|
pcre_tables.c \
|
||||||
pcre_ucp_searchfuncs.c \
|
|
||||||
pcre_valid_utf8.c \
|
pcre_valid_utf8.c \
|
||||||
pcre_version.c \
|
pcre_version.c \
|
||||||
pcre_xclass.c \
|
pcre_xclass.c \
|
||||||
@ -238,8 +245,6 @@ files="\
|
|||||||
pcre_stringpiece_unittest.cc \
|
pcre_stringpiece_unittest.cc \
|
||||||
perltest.pl \
|
perltest.pl \
|
||||||
ucp.h \
|
ucp.h \
|
||||||
ucpinternal.h \
|
|
||||||
ucptable.h \
|
|
||||||
makevp.bat \
|
makevp.bat \
|
||||||
pcre.def \
|
pcre.def \
|
||||||
libpcre.def \
|
libpcre.def \
|
||||||
|
@ -9,8 +9,10 @@ from:
|
|||||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
|
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
|
||||||
|
|
||||||
There is a mailing list for discussion about the development of PCRE at
|
There is a mailing list for discussion about the development of PCRE at
|
||||||
|
pcre-dev@exim.org. You can access the archives and subscribe or manage your
|
||||||
|
subscription here:
|
||||||
|
|
||||||
pcre-dev@exim.org
|
https://lists.exim.org/mailman/listinfo/pcre-dev
|
||||||
|
|
||||||
Please read the NEWS file if you are upgrading from a previous release.
|
Please read the NEWS file if you are upgrading from a previous release.
|
||||||
The contents of this README file are:
|
The contents of this README file are:
|
||||||
@ -25,6 +27,8 @@ The contents of this README file are:
|
|||||||
Shared libraries
|
Shared libraries
|
||||||
Cross-compiling using autotools
|
Cross-compiling using autotools
|
||||||
Using HP's ANSI C++ compiler (aCC)
|
Using HP's ANSI C++ compiler (aCC)
|
||||||
|
Compiling in Tru64 using native compilers
|
||||||
|
Using Sun's compilers for Solaris
|
||||||
Using PCRE from MySQL
|
Using PCRE from MySQL
|
||||||
Making new tarballs
|
Making new tarballs
|
||||||
Testing PCRE
|
Testing PCRE
|
||||||
@ -35,10 +39,10 @@ The contents of this README file are:
|
|||||||
The PCRE APIs
|
The PCRE APIs
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
PCRE is written in C, and it has its own API. There are three sets of functions,
|
PCRE is written in C, and it has its own API. There are three sets of
|
||||||
one for the 8-bit library, which processes strings of bytes, one for the
|
functions, one for the 8-bit library, which processes strings of bytes, one for
|
||||||
16-bit library, which processes strings of 16-bit values, and one for the 32-bit
|
the 16-bit library, which processes strings of 16-bit values, and one for the
|
||||||
library, which processes strings of 32-bit values. The distribution also
|
32-bit library, which processes strings of 32-bit values. The distribution also
|
||||||
includes a set of C++ wrapper functions (see the pcrecpp man page for details),
|
includes a set of C++ wrapper functions (see the pcrecpp man page for details),
|
||||||
courtesy of Google Inc., which can be used to call the 8-bit PCRE library from
|
courtesy of Google Inc., which can be used to call the 8-bit PCRE library from
|
||||||
C++.
|
C++.
|
||||||
@ -81,11 +85,12 @@ documentation is supplied in two other forms:
|
|||||||
1. There are files called doc/pcre.txt, doc/pcregrep.txt, and
|
1. There are files called doc/pcre.txt, doc/pcregrep.txt, and
|
||||||
doc/pcretest.txt in the source distribution. The first of these is a
|
doc/pcretest.txt in the source distribution. The first of these is a
|
||||||
concatenation of the text forms of all the section 3 man pages except
|
concatenation of the text forms of all the section 3 man pages except
|
||||||
those that summarize individual functions. The other two are the text
|
the listing of pcredemo.c and those that summarize individual functions.
|
||||||
forms of the section 1 man pages for the pcregrep and pcretest commands.
|
The other two are the text forms of the section 1 man pages for the
|
||||||
These text forms are provided for ease of scanning with text editors or
|
pcregrep and pcretest commands. These text forms are provided for ease of
|
||||||
similar tools. They are installed in <prefix>/share/doc/pcre, where
|
scanning with text editors or similar tools. They are installed in
|
||||||
<prefix> is the installation prefix (defaulting to /usr/local).
|
<prefix>/share/doc/pcre, where <prefix> is the installation prefix
|
||||||
|
(defaulting to /usr/local).
|
||||||
|
|
||||||
2. A set of files containing all the documentation in HTML form, hyperlinked
|
2. A set of files containing all the documentation in HTML form, hyperlinked
|
||||||
in various ways, and rooted in a file called index.html, is distributed in
|
in various ways, and rooted in a file called index.html, is distributed in
|
||||||
@ -110,6 +115,11 @@ contributions provided support for compiling PCRE on various flavours of
|
|||||||
Windows (I myself do not use Windows). Nowadays there is more Windows support
|
Windows (I myself do not use Windows). Nowadays there is more Windows support
|
||||||
in the standard distribution, so these contibutions have been archived.
|
in the standard distribution, so these contibutions have been archived.
|
||||||
|
|
||||||
|
A PCRE user maintains downloadable Windows binaries of the pcregrep and
|
||||||
|
pcretest programs here:
|
||||||
|
|
||||||
|
http://www.rexegg.com/pcregrep-pcretest.html
|
||||||
|
|
||||||
|
|
||||||
Building PCRE on non-Unix-like systems
|
Building PCRE on non-Unix-like systems
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
@ -260,9 +270,17 @@ library. They are also documented in the pcrebuild man page.
|
|||||||
|
|
||||||
on the "configure" command.
|
on the "configure" command.
|
||||||
|
|
||||||
. PCRE has a counter that can be set to limit the amount of resources it uses.
|
. PCRE has a counter that limits the depth of nesting of parentheses in a
|
||||||
If the limit is exceeded during a match, the match fails. The default is ten
|
pattern. This limits the amount of system stack that a pattern uses when it
|
||||||
million. You can change the default by setting, for example,
|
is compiled. The default is 250, but you can change it by setting, for
|
||||||
|
example,
|
||||||
|
|
||||||
|
--with-parens-nest-limit=500
|
||||||
|
|
||||||
|
. PCRE has a counter that can be set to limit the amount of resources it uses
|
||||||
|
when matching a pattern. If the limit is exceeded during a match, the match
|
||||||
|
fails. The default is ten million. You can change the default by setting, for
|
||||||
|
example,
|
||||||
|
|
||||||
--with-match-limit=500000
|
--with-match-limit=500000
|
||||||
|
|
||||||
@ -342,7 +360,8 @@ library. They are also documented in the pcrebuild man page.
|
|||||||
report is generated by running "make coverage". If ccache is installed on
|
report is generated by running "make coverage". If ccache is installed on
|
||||||
your system, it must be disabled when building PCRE for coverage reporting.
|
your system, it must be disabled when building PCRE for coverage reporting.
|
||||||
You can do this by setting the environment variable CCACHE_DISABLE=1 before
|
You can do this by setting the environment variable CCACHE_DISABLE=1 before
|
||||||
running "make" to build PCRE.
|
running "make" to build PCRE. There is more information about coverage
|
||||||
|
reporting in the "pcrebuild" documentation.
|
||||||
|
|
||||||
. The pcregrep program currently supports only 8-bit data files, and so
|
. The pcregrep program currently supports only 8-bit data files, and so
|
||||||
requires the 8-bit PCRE library. It is possible to compile pcregrep to use
|
requires the 8-bit PCRE library. It is possible to compile pcregrep to use
|
||||||
@ -354,12 +373,12 @@ library. They are also documented in the pcrebuild man page.
|
|||||||
|
|
||||||
Of course, the relevant libraries must be installed on your system.
|
Of course, the relevant libraries must be installed on your system.
|
||||||
|
|
||||||
. The default size of internal buffer used by pcregrep can be set by, for
|
. The default size (in bytes) of the internal buffer used by pcregrep can be
|
||||||
example:
|
set by, for example:
|
||||||
|
|
||||||
--with-pcregrep-bufsize=50K
|
--with-pcregrep-bufsize=51200
|
||||||
|
|
||||||
The default value is 20K.
|
The value must be a plain integer. The default is 20480.
|
||||||
|
|
||||||
. It is possible to compile pcretest so that it links with the libreadline
|
. It is possible to compile pcretest so that it links with the libreadline
|
||||||
or libedit libraries, by specifying, respectively,
|
or libedit libraries, by specifying, respectively,
|
||||||
@ -575,6 +594,27 @@ running the "configure" script:
|
|||||||
CXXLDFLAGS="-lstd_v2 -lCsup_v2"
|
CXXLDFLAGS="-lstd_v2 -lCsup_v2"
|
||||||
|
|
||||||
|
|
||||||
|
Compiling in Tru64 using native compilers
|
||||||
|
-----------------------------------------
|
||||||
|
|
||||||
|
The following error may occur when compiling with native compilers in the Tru64
|
||||||
|
operating system:
|
||||||
|
|
||||||
|
CXX libpcrecpp_la-pcrecpp.lo
|
||||||
|
cxx: Error: /usr/lib/cmplrs/cxx/V7.1-006/include/cxx/iosfwd, line 58: #error
|
||||||
|
directive: "cannot include iosfwd -- define __USE_STD_IOSTREAM to
|
||||||
|
override default - see section 7.1.2 of the C++ Using Guide"
|
||||||
|
#error "cannot include iosfwd -- define __USE_STD_IOSTREAM to override default
|
||||||
|
- see section 7.1.2 of the C++ Using Guide"
|
||||||
|
|
||||||
|
This may be followed by other errors, complaining that 'namespace "std" has no
|
||||||
|
member'. The solution to this is to add the line
|
||||||
|
|
||||||
|
#define __USE_STD_IOSTREAM 1
|
||||||
|
|
||||||
|
to the config.h file.
|
||||||
|
|
||||||
|
|
||||||
Using Sun's compilers for Solaris
|
Using Sun's compilers for Solaris
|
||||||
---------------------------------
|
---------------------------------
|
||||||
|
|
||||||
@ -624,27 +664,40 @@ NON-AUTOTOOLS-BUILD.
|
|||||||
The RunTest script runs the pcretest test program (which is documented in its
|
The RunTest script runs the pcretest test program (which is documented in its
|
||||||
own man page) on each of the relevant testinput files in the testdata
|
own man page) on each of the relevant testinput files in the testdata
|
||||||
directory, and compares the output with the contents of the corresponding
|
directory, and compares the output with the contents of the corresponding
|
||||||
testoutput files. Some tests are relevant only when certain build-time options
|
testoutput files. RunTest uses a file called testtry to hold the main output
|
||||||
were selected. For example, the tests for UTF-8/16/32 support are run only if
|
from pcretest. Other files whose names begin with "test" are used as working
|
||||||
--enable-utf was used. RunTest outputs a comment when it skips a test.
|
files in some tests.
|
||||||
|
|
||||||
|
Some tests are relevant only when certain build-time options were selected. For
|
||||||
|
example, the tests for UTF-8/16/32 support are run only if --enable-utf was
|
||||||
|
used. RunTest outputs a comment when it skips a test.
|
||||||
|
|
||||||
Many of the tests that are not skipped are run up to three times. The second
|
Many of the tests that are not skipped are run up to three times. The second
|
||||||
run forces pcre_study() to be called for all patterns except for a few in some
|
run forces pcre_study() to be called for all patterns except for a few in some
|
||||||
tests that are marked "never study" (see the pcretest program for how this is
|
tests that are marked "never study" (see the pcretest program for how this is
|
||||||
done). If JIT support is available, the non-DFA tests are run a third time,
|
done). If JIT support is available, the non-DFA tests are run a third time,
|
||||||
this time with a forced pcre_study() with the PCRE_STUDY_JIT_COMPILE option.
|
this time with a forced pcre_study() with the PCRE_STUDY_JIT_COMPILE option.
|
||||||
|
This testing can be suppressed by putting "nojit" on the RunTest command line.
|
||||||
|
|
||||||
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
|
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
|
||||||
libraries that are enabled. If you want to run just one set of tests, call
|
libraries that are enabled. If you want to run just one set of tests, call
|
||||||
RunTest with either the -8, -16 or -32 option.
|
RunTest with either the -8, -16 or -32 option.
|
||||||
|
|
||||||
RunTest uses a file called testtry to hold the main output from pcretest.
|
If valgrind is installed, you can run the tests under it by putting "valgrind"
|
||||||
Other files whose names begin with "test" are used as working files in some
|
on the RunTest command line. To run pcretest on just one or more specific test
|
||||||
tests. To run pcretest on just one or more specific test files, give their
|
files, give their numbers as arguments to RunTest, for example:
|
||||||
numbers as arguments to RunTest, for example:
|
|
||||||
|
|
||||||
RunTest 2 7 11
|
RunTest 2 7 11
|
||||||
|
|
||||||
|
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
|
||||||
|
end), or a number preceded by ~ to exclude a test. For example:
|
||||||
|
|
||||||
|
Runtest 3-15 ~10
|
||||||
|
|
||||||
|
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
|
||||||
|
except test 13. Whatever order the arguments are in, the tests are always run
|
||||||
|
in numerical order.
|
||||||
|
|
||||||
You can also call RunTest with the single argument "list" to cause it to output
|
You can also call RunTest with the single argument "list" to cause it to output
|
||||||
a list of tests.
|
a list of tests.
|
||||||
|
|
||||||
@ -704,21 +757,24 @@ test is run only when JIT support is not available. They test some JIT-specific
|
|||||||
features such as information output from pcretest about JIT compilation.
|
features such as information output from pcretest about JIT compilation.
|
||||||
|
|
||||||
The fourteenth, fifteenth, and sixteenth tests are run only in 8-bit mode, and
|
The fourteenth, fifteenth, and sixteenth tests are run only in 8-bit mode, and
|
||||||
the seventeenth, eighteenth, and nineteenth tests are run only in 16/32-bit mode.
|
the seventeenth, eighteenth, and nineteenth tests are run only in 16/32-bit
|
||||||
These are tests that generate different output in the two modes. They are for
|
mode. These are tests that generate different output in the two modes. They are
|
||||||
general cases, UTF-8/16/32 support, and Unicode property support, respectively.
|
for general cases, UTF-8/16/32 support, and Unicode property support,
|
||||||
|
respectively.
|
||||||
|
|
||||||
The twentieth test is run only in 16/32-bit mode. It tests some specific
|
The twentieth test is run only in 16/32-bit mode. It tests some specific
|
||||||
16/32-bit features of the DFA matching engine.
|
16/32-bit features of the DFA matching engine.
|
||||||
|
|
||||||
The twenty-first and twenty-second tests are run only in 16/32-bit mode, when the
|
The twenty-first and twenty-second tests are run only in 16/32-bit mode, when
|
||||||
link size is set to 2 for the 16-bit library. They test reloading pre-compiled patterns.
|
the link size is set to 2 for the 16-bit library. They test reloading
|
||||||
|
pre-compiled patterns.
|
||||||
|
|
||||||
The twenty-third and twenty-fourth tests are run only in 16-bit mode. They are for
|
The twenty-third and twenty-fourth tests are run only in 16-bit mode. They are
|
||||||
general cases, and UTF-16 support, respectively.
|
for general cases, and UTF-16 support, respectively.
|
||||||
|
|
||||||
|
The twenty-fifth and twenty-sixth tests are run only in 32-bit mode. They are
|
||||||
|
for general cases, and UTF-32 support, respectively.
|
||||||
|
|
||||||
The twenty-fifth and twenty-sixth tests are run only in 32-bit mode. They are for
|
|
||||||
general cases, and UTF-32 support, respectively.
|
|
||||||
|
|
||||||
Character tables
|
Character tables
|
||||||
----------------
|
----------------
|
||||||
@ -932,4 +988,4 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
|
|||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 27 October 2012
|
Last updated: 17 January 2014
|
||||||
|
@ -69,427 +69,447 @@ utf8=$?
|
|||||||
|
|
||||||
echo "Testing pcregrep main features"
|
echo "Testing pcregrep main features"
|
||||||
|
|
||||||
echo "---------------------------- Test 1 ------------------------------" >testtry
|
echo "---------------------------- Test 1 ------------------------------" >testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep PATTERN ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep PATTERN ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 2 ------------------------------" >>testtry
|
echo "---------------------------- Test 2 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep '^PATTERN' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep '^PATTERN' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 3 ------------------------------" >>testtry
|
echo "---------------------------- Test 3 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 4 ------------------------------" >>testtry
|
echo "---------------------------- Test 4 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -ic PATTERN ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -ic PATTERN ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 5 ------------------------------" >>testtry
|
echo "---------------------------- Test 5 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 6 ------------------------------" >>testtry
|
echo "---------------------------- Test 6 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 7 ------------------------------" >>testtry
|
echo "---------------------------- Test 7 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 8 ------------------------------" >>testtry
|
echo "---------------------------- Test 8 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 9 ------------------------------" >>testtry
|
echo "---------------------------- Test 9 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 10 -----------------------------" >>testtry
|
echo "---------------------------- Test 10 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 11 -----------------------------" >>testtry
|
echo "---------------------------- Test 11 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -vn pattern ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -vn pattern ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 12 -----------------------------" >>testtry
|
echo "---------------------------- Test 12 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -ix pattern ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -ix pattern ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 13 -----------------------------" >>testtry
|
echo "---------------------------- Test 13 -----------------------------" >>testtrygrep
|
||||||
echo seventeen >testtemp1
|
echo seventeen >testtemp1grep
|
||||||
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist -f $builddir/testtemp1 ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist -f $builddir/testtemp1grep ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 14 -----------------------------" >>testtry
|
echo "---------------------------- Test 14 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 15 -----------------------------" >>testtry
|
echo "---------------------------- Test 15 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep 'abc^*' ./testdata/grepinput) 2>>testtry >>testtry
|
(cd $srcdir; $valgrind $pcregrep 'abc^*' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 16 -----------------------------" >>testtry
|
echo "---------------------------- Test 16 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtry >>testtry
|
(cd $srcdir; $valgrind $pcregrep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtrygrep >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 17 -----------------------------" >>testtry
|
echo "---------------------------- Test 17 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -M 'the\noutput' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -M 'the\noutput' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 18 -----------------------------" >>testtry
|
echo "---------------------------- Test 18 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 19 -----------------------------" >>testtry
|
echo "---------------------------- Test 19 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -Mix 'Pattern' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -Mix 'Pattern' ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 20 -----------------------------" >>testtry
|
echo "---------------------------- Test 20 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 21 -----------------------------" >>testtry
|
echo "---------------------------- Test 21 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -nA3 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -nA3 'four' ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 22 -----------------------------" >>testtry
|
echo "---------------------------- Test 22 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -nB3 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -nB3 'four' ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 23 -----------------------------" >>testtry
|
echo "---------------------------- Test 23 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -C3 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -C3 'four' ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 24 -----------------------------" >>testtry
|
echo "---------------------------- Test 24 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -A9 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -A9 'four' ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 25 -----------------------------" >>testtry
|
echo "---------------------------- Test 25 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -nB9 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -nB9 'four' ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 26 -----------------------------" >>testtry
|
echo "---------------------------- Test 26 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -A9 -B9 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -A9 -B9 'four' ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 27 -----------------------------" >>testtry
|
echo "---------------------------- Test 27 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -A10 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -A10 'four' ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 28 -----------------------------" >>testtry
|
echo "---------------------------- Test 28 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -nB10 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -nB10 'four' ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 29 -----------------------------" >>testtry
|
echo "---------------------------- Test 29 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -C12 -B10 'four' ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -C12 -B10 'four' ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 30 -----------------------------" >>testtry
|
echo "---------------------------- Test 30 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 31 -----------------------------" >>testtry
|
echo "---------------------------- Test 31 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 32 -----------------------------" >>testtry
|
echo "---------------------------- Test 32 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 33 -----------------------------" >>testtry
|
echo "---------------------------- Test 33 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep 'fox' ./testdata/grepnonexist) >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 34 -----------------------------" >>testtry
|
echo "---------------------------- Test 34 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -s 'fox' ./testdata/grepnonexist) >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep -s 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 35 -----------------------------" >>testtry
|
echo "---------------------------- Test 35 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 36 -----------------------------" >>testtry
|
echo "---------------------------- Test 36 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 37 -----------------------------" >>testtry
|
echo "---------------------------- Test 37 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep '^(a+)*\d' ./testdata/grepinput) >>testtry 2>teststderr
|
(cd $srcdir; $valgrind $pcregrep '^(a+)*\d' ./testdata/grepinput) >>testtrygrep 2>teststderrgrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
echo "======== STDERR ========" >>testtry
|
echo "======== STDERR ========" >>testtrygrep
|
||||||
cat teststderr >>testtry
|
cat teststderrgrep >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 38 ------------------------------" >>testtry
|
echo "---------------------------- Test 38 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep '>\x00<' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep '>\x00<' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 39 ------------------------------" >>testtry
|
echo "---------------------------- Test 39 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -A1 'before the binary zero' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -A1 'before the binary zero' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 40 ------------------------------" >>testtry
|
echo "---------------------------- Test 40 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -B1 'after the binary zero' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -B1 'after the binary zero' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 41 ------------------------------" >>testtry
|
echo "---------------------------- Test 41 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 42 ------------------------------" >>testtry
|
echo "---------------------------- Test 42 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 43 ------------------------------" >>testtry
|
echo "---------------------------- Test 43 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -on 'before|zero|after' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -on 'before|zero|after' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 44 ------------------------------" >>testtry
|
echo "---------------------------- Test 44 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -on -e before -ezero -e after ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -on -e before -ezero -e after ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 45 ------------------------------" >>testtry
|
echo "---------------------------- Test 45 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 46 ------------------------------" >>testtry
|
echo "---------------------------- Test 46 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -eabc -e '(unclosed' ./testdata/grepinput) 2>>testtry >>testtry
|
(cd $srcdir; $valgrind $pcregrep -eabc -e '(unclosed' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 47 ------------------------------" >>testtry
|
echo "---------------------------- Test 47 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -Fx "AB.VE
|
(cd $srcdir; $valgrind $pcregrep -Fx "AB.VE
|
||||||
elephant" ./testdata/grepinput) >>testtry
|
elephant" ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 48 ------------------------------" >>testtry
|
echo "---------------------------- Test 48 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -F "AB.VE
|
(cd $srcdir; $valgrind $pcregrep -F "AB.VE
|
||||||
elephant" ./testdata/grepinput) >>testtry
|
elephant" ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 49 ------------------------------" >>testtry
|
echo "---------------------------- Test 49 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -F -e DATA -e "AB.VE
|
(cd $srcdir; $valgrind $pcregrep -F -e DATA -e "AB.VE
|
||||||
elephant" ./testdata/grepinput) >>testtry
|
elephant" ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 50 ------------------------------" >>testtry
|
echo "---------------------------- Test 50 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 51 ------------------------------" >>testtry
|
echo "---------------------------- Test 51 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -Mv "brown\sfox" ./testdata/grepinputv) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -Mv "brown\sfox" ./testdata/grepinputv) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 52 ------------------------------" >>testtry
|
echo "---------------------------- Test 52 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --colour=always jumps ./testdata/grepinputv) >>testtry
|
(cd $srcdir; $valgrind $pcregrep --colour=always jumps ./testdata/grepinputv) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 53 ------------------------------" >>testtry
|
echo "---------------------------- Test 53 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 54 ------------------------------" >>testtry
|
echo "---------------------------- Test 54 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 55 -----------------------------" >>testtry
|
echo "---------------------------- Test 55 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 56 -----------------------------" >>testtry
|
echo "---------------------------- Test 56 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -c lazy ./testdata/grepinput*) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -c lazy ./testdata/grepinput*) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 57 -----------------------------" >>testtry
|
echo "---------------------------- Test 57 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -c -l lazy ./testdata/grepinput*) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -c -l lazy ./testdata/grepinput*) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 58 -----------------------------" >>testtry
|
echo "---------------------------- Test 58 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --regex=PATTERN ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep --regex=PATTERN ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 59 -----------------------------" >>testtry
|
echo "---------------------------- Test 59 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --regexp=PATTERN ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep --regexp=PATTERN ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 60 -----------------------------" >>testtry
|
echo "---------------------------- Test 60 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --regex PATTERN ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep --regex PATTERN ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 61 -----------------------------" >>testtry
|
echo "---------------------------- Test 61 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --regexp PATTERN ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep --regexp PATTERN ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 62 -----------------------------" >>testtry
|
echo "---------------------------- Test 62 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 63 -----------------------------" >>testtry
|
echo "---------------------------- Test 63 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 64 ------------------------------" >>testtry
|
echo "---------------------------- Test 64 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 65 ------------------------------" >>testtry
|
echo "---------------------------- Test 65 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 66 ------------------------------" >>testtry
|
echo "---------------------------- Test 66 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 67 ------------------------------" >>testtry
|
echo "---------------------------- Test 67 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 68 ------------------------------" >>testtry
|
echo "---------------------------- Test 68 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 69 -----------------------------" >>testtry
|
echo "---------------------------- Test 69 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -vn --colour=always pattern ./testdata/grepinputx) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -vn --colour=always pattern ./testdata/grepinputx) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 70 -----------------------------" >>testtry
|
echo "---------------------------- Test 70 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtry
|
(cd $srcdir; $valgrind $pcregrep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 71 -----------------------------" >>testtry
|
echo "---------------------------- Test 71 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|^03" ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 72 -----------------------------" >>testtry
|
echo "---------------------------- Test 72 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 73 -----------------------------" >>testtry
|
echo "---------------------------- Test 73 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 74 -----------------------------" >>testtry
|
echo "---------------------------- Test 74 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -o "^01|02|^03" ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -o "^01|02|^03" ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 75 -----------------------------" >>testtry
|
echo "---------------------------- Test 75 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --color=always "^01|02|^03" ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep --color=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 76 -----------------------------" >>testtry
|
echo "---------------------------- Test 76 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 77 -----------------------------" >>testtry
|
echo "---------------------------- Test 77 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|03" ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|03" ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 78 -----------------------------" >>testtry
|
echo "---------------------------- Test 78 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|03" ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 79 -----------------------------" >>testtry
|
echo "---------------------------- Test 79 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 80 -----------------------------" >>testtry
|
echo "---------------------------- Test 80 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -o "\b01|\b02" ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -o "\b01|\b02" ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 81 -----------------------------" >>testtry
|
echo "---------------------------- Test 81 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 82 -----------------------------" >>testtry
|
echo "---------------------------- Test 82 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 83 -----------------------------" >>testtry
|
echo "---------------------------- Test 83 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 84 -----------------------------" >>testtry
|
echo "---------------------------- Test 84 -----------------------------" >>testtrygrep
|
||||||
echo testdata/grepinput3 >testtemp1
|
echo testdata/grepinput3 >testtemp1grep
|
||||||
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --file-list $builddir/testtemp1 "fox|complete|t7") >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --file-list $builddir/testtemp1grep "fox|complete|t7") >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 85 -----------------------------" >>testtry
|
echo "---------------------------- Test 85 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 86 -----------------------------" >>testtry
|
echo "---------------------------- Test 86 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep "dog" ./testdata/grepbinary) >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 87 -----------------------------" >>testtry
|
echo "---------------------------- Test 87 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep "cat" ./testdata/grepbinary) >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep "cat" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 88 -----------------------------" >>testtry
|
echo "---------------------------- Test 88 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -v "cat" ./testdata/grepbinary) >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep -v "cat" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 89 -----------------------------" >>testtry
|
echo "---------------------------- Test 89 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -I "dog" ./testdata/grepbinary) >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep -I "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 90 -----------------------------" >>testtry
|
echo "---------------------------- Test 90 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --binary-files=without-match "dog" ./testdata/grepbinary) >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep --binary-files=without-match "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 91 -----------------------------" >>testtry
|
echo "---------------------------- Test 91 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -a "dog" ./testdata/grepbinary) >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep -a "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 92 -----------------------------" >>testtry
|
echo "---------------------------- Test 92 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --binary-files=text "dog" ./testdata/grepbinary) >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep --binary-files=text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 93 -----------------------------" >>testtry
|
echo "---------------------------- Test 93 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --text "dog" ./testdata/grepbinary) >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep --text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 94 -----------------------------" >>testtry
|
echo "---------------------------- Test 94 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 'fox' ./testdata/grepinput* | sort) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 'fox' ./testdata/grepinput* | sort) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 95 -----------------------------" >>testtry
|
echo "---------------------------- Test 95 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete") >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete") >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 96 -----------------------------" >>testtry
|
echo "---------------------------- Test 96 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 97 -----------------------------" >>testtry
|
echo "---------------------------- Test 97 -----------------------------" >>testtrygrep
|
||||||
echo "grepinput$" >testtemp1
|
echo "grepinput$" >testtemp1grep
|
||||||
echo "grepinput8" >>testtemp1
|
echo "grepinput8" >>testtemp1grep
|
||||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude-from $builddir/testtemp1 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 98 -----------------------------" >>testtry
|
echo "---------------------------- Test 98 -----------------------------" >>testtrygrep
|
||||||
echo "grepinput$" >testtemp1
|
echo "grepinput$" >testtemp1grep
|
||||||
echo "grepinput8" >>testtemp1
|
echo "grepinput8" >>testtemp1grep
|
||||||
(cd $srcdir; $valgrind $pcregrep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 99 -----------------------------" >>testtry
|
echo "---------------------------- Test 99 -----------------------------" >>testtrygrep
|
||||||
echo "grepinput$" >testtemp1
|
echo "grepinput$" >testtemp1grep
|
||||||
echo "grepinput8" >testtemp2
|
echo "grepinput8" >testtemp2grep
|
||||||
(cd $srcdir; $valgrind $pcregrep -L -r --include grepinput --exclude-from $builddir/testtemp1 --exclude-from=$builddir/testtemp2 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -L -r --include grepinput --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 100 ------------------------------" >>testtry
|
echo "---------------------------- Test 100 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -Ho2 --only-matching=1 -o3 '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -Ho2 --only-matching=1 -o3 '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 101 ------------------------------" >>testtry
|
echo "---------------------------- Test 101 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
|
echo "---------------------------- Test 102 -----------------------------" >>testtrygrep
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -n "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||||
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
|
echo "---------------------------- Test 103 -----------------------------" >>testtrygrep
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||||
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
|
echo "---------------------------- Test 104 -----------------------------" >>testtrygrep
|
||||||
|
(cd $srcdir; $valgrind $pcregrep -n --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||||
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
|
echo "---------------------------- Test 105 -----------------------------" >>testtrygrep
|
||||||
|
(cd $srcdir; $valgrind $pcregrep --colour=always "ipsum|" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||||
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
|
echo "---------------------------- Test 106 -----------------------------" >>testtrygrep
|
||||||
|
(cd $srcdir; echo "a" | $valgrind $pcregrep -M "|a" ) >>testtrygrep 2>&1
|
||||||
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
|
|
||||||
# Now compare the results.
|
# Now compare the results.
|
||||||
|
|
||||||
$cf $srcdir/testdata/grepoutput testtry
|
$cf $srcdir/testdata/grepoutput testtrygrep
|
||||||
if [ $? != 0 ] ; then exit 1; fi
|
if [ $? != 0 ] ; then exit 1; fi
|
||||||
|
|
||||||
|
|
||||||
@ -498,15 +518,15 @@ if [ $? != 0 ] ; then exit 1; fi
|
|||||||
if [ $utf8 -ne 0 ] ; then
|
if [ $utf8 -ne 0 ] ; then
|
||||||
echo "Testing pcregrep UTF-8 features"
|
echo "Testing pcregrep UTF-8 features"
|
||||||
|
|
||||||
echo "---------------------------- Test U1 ------------------------------" >testtry
|
echo "---------------------------- Test U1 ------------------------------" >testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test U2 ------------------------------" >>testtry
|
echo "---------------------------- Test U2 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $pcregrep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtry
|
(cd $srcdir; $valgrind $pcregrep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtrygrep
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
$cf $srcdir/testdata/grepoutput8 testtry
|
$cf $srcdir/testdata/grepoutput8 testtrygrep
|
||||||
if [ $? != 0 ] ; then exit 1; fi
|
if [ $? != 0 ] ; then exit 1; fi
|
||||||
|
|
||||||
else
|
else
|
||||||
@ -522,28 +542,28 @@ fi
|
|||||||
# starts with a hyphen. These tests are run in the build directory.
|
# starts with a hyphen. These tests are run in the build directory.
|
||||||
|
|
||||||
echo "Testing pcregrep newline settings"
|
echo "Testing pcregrep newline settings"
|
||||||
printf "abc\rdef\r\nghi\njkl" >testNinput
|
printf "abc\rdef\r\nghi\njkl" >testNinputgrep
|
||||||
|
|
||||||
printf "%c--------------------------- Test N1 ------------------------------\r\n" - >testtry
|
printf "%c--------------------------- Test N1 ------------------------------\r\n" - >testtrygrep
|
||||||
$valgrind $pcregrep -n -N CR "^(abc|def|ghi|jkl)" testNinput >>testtry
|
$valgrind $pcregrep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||||
|
|
||||||
printf "%c--------------------------- Test N2 ------------------------------\r\n" - >>testtry
|
printf "%c--------------------------- Test N2 ------------------------------\r\n" - >>testtrygrep
|
||||||
$valgrind $pcregrep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinput >>testtry
|
$valgrind $pcregrep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||||
|
|
||||||
printf "%c--------------------------- Test N3 ------------------------------\r\n" - >>testtry
|
printf "%c--------------------------- Test N3 ------------------------------\r\n" - >>testtrygrep
|
||||||
pattern=`printf 'def\rjkl'`
|
pattern=`printf 'def\rjkl'`
|
||||||
$valgrind $pcregrep -n --newline=cr -F "$pattern" testNinput >>testtry
|
$valgrind $pcregrep -n --newline=cr -F "$pattern" testNinputgrep >>testtrygrep
|
||||||
|
|
||||||
printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtry
|
printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtrygrep
|
||||||
$valgrind $pcregrep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinput >>testtry
|
$valgrind $pcregrep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinputgrep >>testtrygrep
|
||||||
|
|
||||||
printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtry
|
printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtrygrep
|
||||||
$valgrind $pcregrep -n --newline=any "^(abc|def|ghi|jkl)" testNinput >>testtry
|
$valgrind $pcregrep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||||
|
|
||||||
printf "%c--------------------------- Test N6 ------------------------------\r\n" - >>testtry
|
printf "%c--------------------------- Test N6 ------------------------------\r\n" - >>testtrygrep
|
||||||
$valgrind $pcregrep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinput >>testtry
|
$valgrind $pcregrep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||||
|
|
||||||
$cf $srcdir/testdata/grepoutputN testtry
|
$cf $srcdir/testdata/grepoutputN testtrygrep
|
||||||
if [ $? != 0 ] ; then exit 1; fi
|
if [ $? != 0 ] ; then exit 1; fi
|
||||||
|
|
||||||
exit 0
|
exit 0
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
# Run the PCRE tests using the pcretest program. The appropriate tests are
|
# Run the PCRE tests using the pcretest program. The appropriate tests are
|
||||||
# selected, depending on which build-time options were used.
|
# selected, depending on which build-time options were used.
|
||||||
#
|
#
|
||||||
@ -13,21 +14,38 @@
|
|||||||
# UTF-8 with the UTF-8 check turned off; for this, studying must also be
|
# UTF-8 with the UTF-8 check turned off; for this, studying must also be
|
||||||
# disabled with /SS.
|
# disabled with /SS.
|
||||||
#
|
#
|
||||||
# When JIT support is available, all the tests are also run with -s+ to test
|
# When JIT support is available, all appropriate tests are also run with -s+ to
|
||||||
# (again, almost) everything with studying and the JIT option. There are also
|
# test (again, almost) everything with studying and the JIT option, unless
|
||||||
# two tests for JIT-specific features, one to be run when JIT support is
|
# "nojit" is given on the command line. There are also two tests for
|
||||||
# available, and one when it is not.
|
# JIT-specific features, one to be run when JIT support is available (unless
|
||||||
|
# "nojit" is specified), and one when it is not.
|
||||||
#
|
#
|
||||||
# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also
|
# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also
|
||||||
# possible to select which to test by the arguments -8, -16 or -32.
|
# possible to select which to test by giving "-8", "-16" or "-32" on the
|
||||||
|
# command line.
|
||||||
#
|
#
|
||||||
# Other arguments for this script can be individual test numbers, or the word
|
# As well as "nojit", "-8", "-16", and "-32", arguments for this script are
|
||||||
# "valgrind", "valgrind-log" or "sim" followed by an argument to run cross-
|
# individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the
|
||||||
# compiled executables under a simulator, for example:
|
# end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10"
|
||||||
|
# runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
|
||||||
|
# except test 10. Whatever order the arguments are in, the tests are always run
|
||||||
|
# in numerical order.
|
||||||
|
#
|
||||||
|
# The special argument "3S" runs test 3, stopping if it fails. Test 3 is the
|
||||||
|
# locale test, and failure usually means there's an issue with the locale
|
||||||
|
# rather than a bug in PCRE, so normally subsequent tests are run. "3S" is
|
||||||
|
# useful when you want to debug or update the test.
|
||||||
|
#
|
||||||
|
# Inappropriate tests are automatically skipped (with a comment to say so): for
|
||||||
|
# example, if JIT support is not compiled, test 12 is skipped, whereas if JIT
|
||||||
|
# support is compiled, test 13 is skipped.
|
||||||
|
#
|
||||||
|
# Other arguments can be one of the words "valgrind", "valgrind-log", or "sim"
|
||||||
|
# followed by an argument to run cross-compiled executables under a simulator,
|
||||||
|
# for example:
|
||||||
#
|
#
|
||||||
# RunTest 3 sim "qemu-arm -s 8388608"
|
# RunTest 3 sim "qemu-arm -s 8388608"
|
||||||
#
|
#
|
||||||
#
|
|
||||||
# There are two special cases where only one argument is allowed:
|
# There are two special cases where only one argument is allowed:
|
||||||
#
|
#
|
||||||
# If the first and only argument is "ebcdic", the script runs the special
|
# If the first and only argument is "ebcdic", the script runs the special
|
||||||
@ -36,7 +54,7 @@
|
|||||||
#
|
#
|
||||||
# If the script is obeyed as "RunTest list", a list of available tests is
|
# If the script is obeyed as "RunTest list", a list of available tests is
|
||||||
# output, but none of them are run.
|
# output, but none of them are run.
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
# Define test titles in variables so that they can be output as a list. Some
|
# Define test titles in variables so that they can be output as a list. Some
|
||||||
# of them are modified (e.g. with -8 or -16) when used in the actual tests.
|
# of them are modified (e.g. with -8 or -16) when used in the actual tests.
|
||||||
@ -53,8 +71,8 @@ title8="Test 8: DFA matching main functionality"
|
|||||||
title9="Test 9: DFA matching with UTF"
|
title9="Test 9: DFA matching with UTF"
|
||||||
title10="Test 10: DFA matching with Unicode properties"
|
title10="Test 10: DFA matching with Unicode properties"
|
||||||
title11="Test 11: Internal offsets and code size tests"
|
title11="Test 11: Internal offsets and code size tests"
|
||||||
title12="Test 12: JIT-specific features (JIT available)"
|
title12="Test 12: JIT-specific features (when JIT is available)"
|
||||||
title13="Test 13: JIT-specific features (JIT not available)"
|
title13="Test 13: JIT-specific features (when JIT is not available)"
|
||||||
title14="Test 14: Specials for the basic 8-bit library"
|
title14="Test 14: Specials for the basic 8-bit library"
|
||||||
title15="Test 15: Specials for the 8-bit library with UTF-8 support"
|
title15="Test 15: Specials for the 8-bit library with UTF-8 support"
|
||||||
title16="Test 16: Specials for the 8-bit library with Unicode propery support"
|
title16="Test 16: Specials for the 8-bit library with Unicode propery support"
|
||||||
@ -69,6 +87,8 @@ title24="Test 24: Specials for the 16-bit library with UTF-16 support"
|
|||||||
title25="Test 25: Specials for the 32-bit library"
|
title25="Test 25: Specials for the 32-bit library"
|
||||||
title26="Test 26: Specials for the 32-bit library with UTF-32 support"
|
title26="Test 26: Specials for the 32-bit library with UTF-32 support"
|
||||||
|
|
||||||
|
maxtest=26
|
||||||
|
|
||||||
if [ $# -eq 1 -a "$1" = "list" ]; then
|
if [ $# -eq 1 -a "$1" = "list" ]; then
|
||||||
echo $title1
|
echo $title1
|
||||||
echo $title2 "(not UTF)"
|
echo $title2 "(not UTF)"
|
||||||
@ -151,17 +171,19 @@ fi
|
|||||||
|
|
||||||
# Default values
|
# Default values
|
||||||
|
|
||||||
valgrind=
|
|
||||||
sim=
|
|
||||||
arg8=
|
arg8=
|
||||||
arg16=
|
arg16=
|
||||||
arg32=
|
arg32=
|
||||||
|
nojit=
|
||||||
|
sim=
|
||||||
|
skip=
|
||||||
|
valgrind=
|
||||||
|
|
||||||
# This is in case the caller has set aliases (as I do - PH)
|
# This is in case the caller has set aliases (as I do - PH)
|
||||||
unset cp ls mv rm
|
unset cp ls mv rm
|
||||||
|
|
||||||
# Select which tests to run; for those that are explicitly requested, check
|
# Process options and select which tests to run; for those that are explicitly
|
||||||
# that the necessary optional facilities are available.
|
# requested, check that the necessary optional facilities are available.
|
||||||
|
|
||||||
do1=no
|
do1=no
|
||||||
do2=no
|
do2=no
|
||||||
@ -221,10 +243,34 @@ while [ $# -gt 0 ] ; do
|
|||||||
-8) arg8=yes;;
|
-8) arg8=yes;;
|
||||||
-16) arg16=yes;;
|
-16) arg16=yes;;
|
||||||
-32) arg32=yes;;
|
-32) arg32=yes;;
|
||||||
|
nojit) nojit=yes;;
|
||||||
|
sim) shift; sim=$1;;
|
||||||
valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";;
|
valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";;
|
||||||
valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all --log-file=report.%p ";;
|
valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all --log-file=report.%p ";;
|
||||||
sim) shift; sim=$1;;
|
~*)
|
||||||
*) echo "Unknown test number '$1'"; exit 1;;
|
if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then
|
||||||
|
skip="$skip `expr "$1" : '~\([0-9]*\)*$'`"
|
||||||
|
else
|
||||||
|
echo "Unknown option or test selector '$1'"; exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
*-*)
|
||||||
|
if expr "$1" : '[0-9][0-9]*-[0-9]*$' >/dev/null; then
|
||||||
|
tf=`expr "$1" : '\([0-9]*\)'`
|
||||||
|
tt=`expr "$1" : '.*-\([0-9]*\)'`
|
||||||
|
if [ "$tt" = "" ] ; then tt=$maxtest; fi
|
||||||
|
if expr \( "$tf" "<" 1 \) \| \( "$tt" ">" "$maxtest" \) >/dev/null; then
|
||||||
|
echo "Invalid test range '$1'"; exit 1
|
||||||
|
fi
|
||||||
|
while expr "$tf" "<=" "$tt" >/dev/null; do
|
||||||
|
eval do${tf}=yes
|
||||||
|
tf=`expr $tf + 1`
|
||||||
|
done
|
||||||
|
else
|
||||||
|
echo "Invalid test range '$1'"; exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
*) echo "Unknown option or test selector '$1'"; exit 1;;
|
||||||
esac
|
esac
|
||||||
shift
|
shift
|
||||||
done
|
done
|
||||||
@ -309,79 +355,12 @@ ucp=$?
|
|||||||
jitopt=
|
jitopt=
|
||||||
$sim ./pcretest -C jit >/dev/null
|
$sim ./pcretest -C jit >/dev/null
|
||||||
jit=$?
|
jit=$?
|
||||||
if [ $jit -ne 0 ] ; then
|
if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
|
||||||
jitopt=-s+
|
jitopt=-s+
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $utf -eq 0 ] ; then
|
|
||||||
if [ $do4 = yes ] ; then
|
|
||||||
echo "Can't run test 4 because UTF support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ $do5 = yes ] ; then
|
|
||||||
echo "Can't run test 5 because UTF support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ $do9 = yes ] ; then
|
|
||||||
echo "Can't run test 8 because UTF support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ $do15 = yes ] ; then
|
|
||||||
echo "Can't run test 15 because UTF support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ $do18 = yes ] ; then
|
|
||||||
echo "Can't run test 18 because UTF support is not configured"
|
|
||||||
fi
|
|
||||||
if [ $do22 = yes ] ; then
|
|
||||||
echo "Can't run test 22 because UTF support is not configured"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $ucp -eq 0 ] ; then
|
|
||||||
if [ $do6 = yes ] ; then
|
|
||||||
echo "Can't run test 6 because Unicode property support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ $do7 = yes ] ; then
|
|
||||||
echo "Can't run test 7 because Unicode property support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ $do10 = yes ] ; then
|
|
||||||
echo "Can't run test 10 because Unicode property support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ $do16 = yes ] ; then
|
|
||||||
echo "Can't run test 16 because Unicode property support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ $do19 = yes ] ; then
|
|
||||||
echo "Can't run test 19 because Unicode property support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $link_size -ne 2 ] ; then
|
|
||||||
if [ $do11 = yes ] ; then
|
|
||||||
echo "Can't run test 11 because the link size ($link_size) is not 2"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $jit -eq 0 ] ; then
|
|
||||||
if [ $do12 = "yes" ] ; then
|
|
||||||
echo "Can't run test 12 because JIT support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
if [ $do13 = "yes" ] ; then
|
|
||||||
echo "Can't run test 13 because JIT support is configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# If no specific tests were requested, select all. Those that are not
|
# If no specific tests were requested, select all. Those that are not
|
||||||
# relevant will be skipped.
|
# relevant will be automatically skipped.
|
||||||
|
|
||||||
if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
||||||
$do5 = no -a $do6 = no -a $do7 = no -a $do8 = no -a \
|
$do5 = no -a $do6 = no -a $do7 = no -a $do8 = no -a \
|
||||||
@ -418,6 +397,11 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
|||||||
do26=yes
|
do26=yes
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Handle any explicit skips at this stage, so that an argument list may consist
|
||||||
|
# only of explicit skips.
|
||||||
|
|
||||||
|
for i in $skip; do eval do$i=no; done
|
||||||
|
|
||||||
# Show which release and which test data
|
# Show which release and which test data
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
@ -479,8 +463,9 @@ fi
|
|||||||
|
|
||||||
# Locale-specific tests, provided that either the "fr_FR" or the "french"
|
# Locale-specific tests, provided that either the "fr_FR" or the "french"
|
||||||
# locale is available. The former is the Unix-like standard; the latter is
|
# locale is available. The former is the Unix-like standard; the latter is
|
||||||
# for Windows. Another possibility is "fr", which needs to be run against
|
# for Windows. Another possibility is "fr". Unfortunately, different versions
|
||||||
# the Windows-specific input and output files.
|
# of the French locale give different outputs for some items. This test passes
|
||||||
|
# if the output matches any one of the alternative output files.
|
||||||
|
|
||||||
if [ $do3 = yes ] ; then
|
if [ $do3 = yes ] ; then
|
||||||
locale -a | grep '^fr_FR$' >/dev/null
|
locale -a | grep '^fr_FR$' >/dev/null
|
||||||
@ -488,20 +473,28 @@ if [ $do3 = yes ] ; then
|
|||||||
locale=fr_FR
|
locale=fr_FR
|
||||||
infile=$testdata/testinput3
|
infile=$testdata/testinput3
|
||||||
outfile=$testdata/testoutput3
|
outfile=$testdata/testoutput3
|
||||||
|
outfile2=$testdata/testoutput3A
|
||||||
|
outfile3=$testdata/testoutput3B
|
||||||
else
|
else
|
||||||
infile=test3input
|
infile=test3input
|
||||||
outfile=test3output
|
outfile=test3output
|
||||||
|
outfile2=test3outputA
|
||||||
|
outfile3=test3outputB
|
||||||
locale -a | grep '^french$' >/dev/null
|
locale -a | grep '^french$' >/dev/null
|
||||||
if [ $? -eq 0 ] ; then
|
if [ $? -eq 0 ] ; then
|
||||||
locale=french
|
locale=french
|
||||||
sed 's/fr_FR/french/' $testdata/testinput3 >test3input
|
sed 's/fr_FR/french/' $testdata/testinput3 >test3input
|
||||||
sed 's/fr_FR/french/' $testdata/testoutput3 >test3output
|
sed 's/fr_FR/french/' $testdata/testoutput3 >test3output
|
||||||
|
sed 's/fr_FR/french/' $testdata/testoutput3A >test3outputA
|
||||||
|
sed 's/fr_FR/french/' $testdata/testoutput3B >test3outputB
|
||||||
else
|
else
|
||||||
locale -a | grep '^fr$' >/dev/null
|
locale -a | grep '^fr$' >/dev/null
|
||||||
if [ $? -eq 0 ] ; then
|
if [ $? -eq 0 ] ; then
|
||||||
locale=fr
|
locale=fr
|
||||||
sed 's/fr_FR/fr/' $testdata/wintestinput3 >test3input
|
sed 's/fr_FR/fr/' $testdata/intestinput3 >test3input
|
||||||
sed 's/fr_FR/fr/' $testdata/wintestoutput3 >test3output
|
sed 's/fr_FR/fr/' $testdata/intestoutput3 >test3output
|
||||||
|
sed 's/fr_FR/fr/' $testdata/intestoutput3A >test3outputA
|
||||||
|
sed 's/fr_FR/fr/' $testdata/intestoutput3B >test3outputB
|
||||||
else
|
else
|
||||||
locale=
|
locale=
|
||||||
fi
|
fi
|
||||||
@ -513,18 +506,20 @@ if [ $do3 = yes ] ; then
|
|||||||
for opt in "" "-s" $jitopt; do
|
for opt in "" "-s" $jitopt; do
|
||||||
$sim $valgrind ./pcretest -q $bmode $opt $infile testtry
|
$sim $valgrind ./pcretest -q $bmode $opt $infile testtry
|
||||||
if [ $? = 0 ] ; then
|
if [ $? = 0 ] ; then
|
||||||
$cf $outfile testtry
|
if $cf $outfile testtry >teststdout || \
|
||||||
if [ $? != 0 ] ; then
|
$cf $outfile2 testtry >teststdout || \
|
||||||
echo " "
|
$cf $outfile3 testtry >teststdout
|
||||||
echo "Locale test did not run entirely successfully."
|
then
|
||||||
echo "This usually means that there is a problem with the locale"
|
|
||||||
echo "settings rather than a bug in PCRE."
|
|
||||||
break;
|
|
||||||
else
|
|
||||||
if [ "$opt" = "-s" ] ; then echo " OK with study"
|
if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||||
elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||||
else echo " OK"
|
else echo " OK"
|
||||||
fi
|
fi
|
||||||
|
else
|
||||||
|
echo "** Locale test did not run successfully. The output did not match"
|
||||||
|
echo " $outfile, $outfile2 or $outfile3."
|
||||||
|
echo " This may mean that there is a problem with the locale settings rather"
|
||||||
|
echo " than a bug in PCRE."
|
||||||
|
exit 1
|
||||||
fi
|
fi
|
||||||
else exit 1
|
else exit 1
|
||||||
fi
|
fi
|
||||||
@ -700,7 +695,7 @@ fi
|
|||||||
|
|
||||||
if [ $do12 = yes ] ; then
|
if [ $do12 = yes ] ; then
|
||||||
echo $title12
|
echo $title12
|
||||||
if [ $jit -eq 0 ] ; then
|
if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
|
||||||
echo " Skipped because JIT is not available or not usable"
|
echo " Skipped because JIT is not available or not usable"
|
||||||
else
|
else
|
||||||
$sim $valgrind ./pcretest -q $bmode $testdata/testinput12 testtry
|
$sim $valgrind ./pcretest -q $bmode $testdata/testinput12 testtry
|
||||||
@ -1010,6 +1005,6 @@ fi
|
|||||||
done
|
done
|
||||||
|
|
||||||
# Clean up local working files
|
# Clean up local working files
|
||||||
rm -f test3input test3output testNinput testsaved* teststderr teststdout testtry
|
rm -f test3input test3output test3outputA testNinput testsaved* teststderr teststdout testtry
|
||||||
|
|
||||||
# End
|
# End
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
@rem tests 4 5 9 15 and 18 require utf support
|
@rem tests 4 5 9 15 and 18 require utf support
|
||||||
@rem tests 6 7 10 16 and 19 require ucp support
|
@rem tests 6 7 10 16 and 19 require ucp support
|
||||||
@rem 11 requires ucp and link size 2
|
@rem 11 requires ucp and link size 2
|
||||||
@rem 12 requires presense of jit support
|
@rem 12 requires presence of jit support
|
||||||
@rem 13 requires absence of jit support
|
@rem 13 requires absence of jit support
|
||||||
@rem Sheri P also added override tests for study and jit testing
|
@rem Sheri P also added override tests for study and jit testing
|
||||||
@rem Zoltan Herczeg added libpcre16 support
|
@rem Zoltan Herczeg added libpcre16 support
|
||||||
|
629
tools/pcre/aclocal.m4
vendored
629
tools/pcre/aclocal.m4
vendored
File diff suppressed because it is too large
Load Diff
270
tools/pcre/ar-lib
Normal file
270
tools/pcre/ar-lib
Normal file
@ -0,0 +1,270 @@
|
|||||||
|
#! /bin/sh
|
||||||
|
# Wrapper for Microsoft lib.exe
|
||||||
|
|
||||||
|
me=ar-lib
|
||||||
|
scriptversion=2012-03-01.08; # UTC
|
||||||
|
|
||||||
|
# Copyright (C) 2010-2013 Free Software Foundation, Inc.
|
||||||
|
# Written by Peter Rosin <peda@lysator.liu.se>.
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2, or (at your option)
|
||||||
|
# any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
# As a special exception to the GNU General Public License, if you
|
||||||
|
# distribute this file as part of a program that contains a
|
||||||
|
# configuration script generated by Autoconf, you may include it under
|
||||||
|
# the same distribution terms that you use for the rest of that program.
|
||||||
|
|
||||||
|
# This file is maintained in Automake, please report
|
||||||
|
# bugs to <bug-automake@gnu.org> or send patches to
|
||||||
|
# <automake-patches@gnu.org>.
|
||||||
|
|
||||||
|
|
||||||
|
# func_error message
|
||||||
|
func_error ()
|
||||||
|
{
|
||||||
|
echo "$me: $1" 1>&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
file_conv=
|
||||||
|
|
||||||
|
# func_file_conv build_file
|
||||||
|
# Convert a $build file to $host form and store it in $file
|
||||||
|
# Currently only supports Windows hosts.
|
||||||
|
func_file_conv ()
|
||||||
|
{
|
||||||
|
file=$1
|
||||||
|
case $file in
|
||||||
|
/ | /[!/]*) # absolute file, and not a UNC file
|
||||||
|
if test -z "$file_conv"; then
|
||||||
|
# lazily determine how to convert abs files
|
||||||
|
case `uname -s` in
|
||||||
|
MINGW*)
|
||||||
|
file_conv=mingw
|
||||||
|
;;
|
||||||
|
CYGWIN*)
|
||||||
|
file_conv=cygwin
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
file_conv=wine
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
case $file_conv in
|
||||||
|
mingw)
|
||||||
|
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
|
||||||
|
;;
|
||||||
|
cygwin)
|
||||||
|
file=`cygpath -m "$file" || echo "$file"`
|
||||||
|
;;
|
||||||
|
wine)
|
||||||
|
file=`winepath -w "$file" || echo "$file"`
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
# func_at_file at_file operation archive
|
||||||
|
# Iterate over all members in AT_FILE performing OPERATION on ARCHIVE
|
||||||
|
# for each of them.
|
||||||
|
# When interpreting the content of the @FILE, do NOT use func_file_conv,
|
||||||
|
# since the user would need to supply preconverted file names to
|
||||||
|
# binutils ar, at least for MinGW.
|
||||||
|
func_at_file ()
|
||||||
|
{
|
||||||
|
operation=$2
|
||||||
|
archive=$3
|
||||||
|
at_file_contents=`cat "$1"`
|
||||||
|
eval set x "$at_file_contents"
|
||||||
|
shift
|
||||||
|
|
||||||
|
for member
|
||||||
|
do
|
||||||
|
$AR -NOLOGO $operation:"$member" "$archive" || exit $?
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
case $1 in
|
||||||
|
'')
|
||||||
|
func_error "no command. Try '$0 --help' for more information."
|
||||||
|
;;
|
||||||
|
-h | --h*)
|
||||||
|
cat <<EOF
|
||||||
|
Usage: $me [--help] [--version] PROGRAM ACTION ARCHIVE [MEMBER...]
|
||||||
|
|
||||||
|
Members may be specified in a file named with @FILE.
|
||||||
|
EOF
|
||||||
|
exit $?
|
||||||
|
;;
|
||||||
|
-v | --v*)
|
||||||
|
echo "$me, version $scriptversion"
|
||||||
|
exit $?
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
if test $# -lt 3; then
|
||||||
|
func_error "you must specify a program, an action and an archive"
|
||||||
|
fi
|
||||||
|
|
||||||
|
AR=$1
|
||||||
|
shift
|
||||||
|
while :
|
||||||
|
do
|
||||||
|
if test $# -lt 2; then
|
||||||
|
func_error "you must specify a program, an action and an archive"
|
||||||
|
fi
|
||||||
|
case $1 in
|
||||||
|
-lib | -LIB \
|
||||||
|
| -ltcg | -LTCG \
|
||||||
|
| -machine* | -MACHINE* \
|
||||||
|
| -subsystem* | -SUBSYSTEM* \
|
||||||
|
| -verbose | -VERBOSE \
|
||||||
|
| -wx* | -WX* )
|
||||||
|
AR="$AR $1"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
action=$1
|
||||||
|
shift
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
orig_archive=$1
|
||||||
|
shift
|
||||||
|
func_file_conv "$orig_archive"
|
||||||
|
archive=$file
|
||||||
|
|
||||||
|
# strip leading dash in $action
|
||||||
|
action=${action#-}
|
||||||
|
|
||||||
|
delete=
|
||||||
|
extract=
|
||||||
|
list=
|
||||||
|
quick=
|
||||||
|
replace=
|
||||||
|
index=
|
||||||
|
create=
|
||||||
|
|
||||||
|
while test -n "$action"
|
||||||
|
do
|
||||||
|
case $action in
|
||||||
|
d*) delete=yes ;;
|
||||||
|
x*) extract=yes ;;
|
||||||
|
t*) list=yes ;;
|
||||||
|
q*) quick=yes ;;
|
||||||
|
r*) replace=yes ;;
|
||||||
|
s*) index=yes ;;
|
||||||
|
S*) ;; # the index is always updated implicitly
|
||||||
|
c*) create=yes ;;
|
||||||
|
u*) ;; # TODO: don't ignore the update modifier
|
||||||
|
v*) ;; # TODO: don't ignore the verbose modifier
|
||||||
|
*)
|
||||||
|
func_error "unknown action specified"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
action=${action#?}
|
||||||
|
done
|
||||||
|
|
||||||
|
case $delete$extract$list$quick$replace,$index in
|
||||||
|
yes,* | ,yes)
|
||||||
|
;;
|
||||||
|
yesyes*)
|
||||||
|
func_error "more than one action specified"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
func_error "no action specified"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
if test -n "$delete"; then
|
||||||
|
if test ! -f "$orig_archive"; then
|
||||||
|
func_error "archive not found"
|
||||||
|
fi
|
||||||
|
for member
|
||||||
|
do
|
||||||
|
case $1 in
|
||||||
|
@*)
|
||||||
|
func_at_file "${1#@}" -REMOVE "$archive"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
func_file_conv "$1"
|
||||||
|
$AR -NOLOGO -REMOVE:"$file" "$archive" || exit $?
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
elif test -n "$extract"; then
|
||||||
|
if test ! -f "$orig_archive"; then
|
||||||
|
func_error "archive not found"
|
||||||
|
fi
|
||||||
|
if test $# -gt 0; then
|
||||||
|
for member
|
||||||
|
do
|
||||||
|
case $1 in
|
||||||
|
@*)
|
||||||
|
func_at_file "${1#@}" -EXTRACT "$archive"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
func_file_conv "$1"
|
||||||
|
$AR -NOLOGO -EXTRACT:"$file" "$archive" || exit $?
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
else
|
||||||
|
$AR -NOLOGO -LIST "$archive" | sed -e 's/\\/\\\\/g' | while read member
|
||||||
|
do
|
||||||
|
$AR -NOLOGO -EXTRACT:"$member" "$archive" || exit $?
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
elif test -n "$quick$replace"; then
|
||||||
|
if test ! -f "$orig_archive"; then
|
||||||
|
if test -z "$create"; then
|
||||||
|
echo "$me: creating $orig_archive"
|
||||||
|
fi
|
||||||
|
orig_archive=
|
||||||
|
else
|
||||||
|
orig_archive=$archive
|
||||||
|
fi
|
||||||
|
|
||||||
|
for member
|
||||||
|
do
|
||||||
|
case $1 in
|
||||||
|
@*)
|
||||||
|
func_file_conv "${1#@}"
|
||||||
|
set x "$@" "@$file"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
func_file_conv "$1"
|
||||||
|
set x "$@" "$file"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
if test -n "$orig_archive"; then
|
||||||
|
$AR -NOLOGO -OUT:"$archive" "$orig_archive" "$@" || exit $?
|
||||||
|
else
|
||||||
|
$AR -NOLOGO -OUT:"$archive" "$@" || exit $?
|
||||||
|
fi
|
||||||
|
|
||||||
|
elif test -n "$list"; then
|
||||||
|
if test ! -f "$orig_archive"; then
|
||||||
|
func_error "archive not found"
|
||||||
|
fi
|
||||||
|
$AR -NOLOGO -LIST "$archive" || exit $?
|
||||||
|
fi
|
@ -1,10 +1,9 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Wrapper for compilers which do not understand '-c -o'.
|
# Wrapper for compilers which do not understand '-c -o'.
|
||||||
|
|
||||||
scriptversion=2012-03-05.13; # UTC
|
scriptversion=2012-10-14.11; # UTC
|
||||||
|
|
||||||
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2009, 2010, 2012 Free
|
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
|
||||||
# Software Foundation, Inc.
|
|
||||||
# Written by Tom Tromey <tromey@cygnus.com>.
|
# Written by Tom Tromey <tromey@cygnus.com>.
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
@ -113,6 +112,11 @@ func_cl_dashl ()
|
|||||||
lib=$dir/$lib.lib
|
lib=$dir/$lib.lib
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
|
if test -f "$dir/lib$lib.a"; then
|
||||||
|
found=yes
|
||||||
|
lib=$dir/lib$lib.a
|
||||||
|
break
|
||||||
|
fi
|
||||||
done
|
done
|
||||||
IFS=$save_IFS
|
IFS=$save_IFS
|
||||||
|
|
||||||
|
@ -5,6 +5,8 @@
|
|||||||
#cmakedefine HAVE_SYS_TYPES_H 1
|
#cmakedefine HAVE_SYS_TYPES_H 1
|
||||||
#cmakedefine HAVE_UNISTD_H 1
|
#cmakedefine HAVE_UNISTD_H 1
|
||||||
#cmakedefine HAVE_WINDOWS_H 1
|
#cmakedefine HAVE_WINDOWS_H 1
|
||||||
|
#cmakedefine HAVE_STDINT_H 1
|
||||||
|
#cmakedefine HAVE_INTTYPES_H 1
|
||||||
|
|
||||||
#cmakedefine HAVE_TYPE_TRAITS_H 1
|
#cmakedefine HAVE_TYPE_TRAITS_H 1
|
||||||
#cmakedefine HAVE_BITS_TYPE_TRAITS_H 1
|
#cmakedefine HAVE_BITS_TYPE_TRAITS_H 1
|
||||||
@ -44,6 +46,7 @@
|
|||||||
#define NEWLINE @NEWLINE@
|
#define NEWLINE @NEWLINE@
|
||||||
#define POSIX_MALLOC_THRESHOLD @PCRE_POSIX_MALLOC_THRESHOLD@
|
#define POSIX_MALLOC_THRESHOLD @PCRE_POSIX_MALLOC_THRESHOLD@
|
||||||
#define LINK_SIZE @PCRE_LINK_SIZE@
|
#define LINK_SIZE @PCRE_LINK_SIZE@
|
||||||
|
#define PARENS_NEST_LIMIT @PCRE_PARENS_NEST_LIMIT@
|
||||||
#define MATCH_LIMIT @PCRE_MATCH_LIMIT@
|
#define MATCH_LIMIT @PCRE_MATCH_LIMIT@
|
||||||
#define MATCH_LIMIT_RECURSION @PCRE_MATCH_LIMIT_RECURSION@
|
#define MATCH_LIMIT_RECURSION @PCRE_MATCH_LIMIT_RECURSION@
|
||||||
#define PCREGREP_BUFSIZE @PCREGREP_BUFSIZE@
|
#define PCREGREP_BUFSIZE @PCREGREP_BUFSIZE@
|
||||||
|
84
tools/pcre/config.guess
vendored
84
tools/pcre/config.guess
vendored
@ -1,14 +1,12 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Attempt to guess a canonical system name.
|
# Attempt to guess a canonical system name.
|
||||||
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
|
# Copyright 1992-2013 Free Software Foundation, Inc.
|
||||||
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
|
|
||||||
# 2011, 2012 Free Software Foundation, Inc.
|
|
||||||
|
|
||||||
timestamp='2012-08-14'
|
timestamp='2013-11-29'
|
||||||
|
|
||||||
# This file is free software; you can redistribute it and/or modify it
|
# This file is free software; you can redistribute it and/or modify it
|
||||||
# under the terms of the GNU General Public License as published by
|
# under the terms of the GNU General Public License as published by
|
||||||
# the Free Software Foundation; either version 2 of the License, or
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
# (at your option) any later version.
|
# (at your option) any later version.
|
||||||
#
|
#
|
||||||
# This program is distributed in the hope that it will be useful, but
|
# This program is distributed in the hope that it will be useful, but
|
||||||
@ -22,19 +20,17 @@ timestamp='2012-08-14'
|
|||||||
# As a special exception to the GNU General Public License, if you
|
# As a special exception to the GNU General Public License, if you
|
||||||
# distribute this file as part of a program that contains a
|
# distribute this file as part of a program that contains a
|
||||||
# configuration script generated by Autoconf, you may include it under
|
# configuration script generated by Autoconf, you may include it under
|
||||||
# the same distribution terms that you use for the rest of that program.
|
# the same distribution terms that you use for the rest of that
|
||||||
|
# program. This Exception is an additional permission under section 7
|
||||||
|
# of the GNU General Public License, version 3 ("GPLv3").
|
||||||
# Originally written by Per Bothner. Please send patches (context
|
|
||||||
# diff format) to <config-patches@gnu.org> and include a ChangeLog
|
|
||||||
# entry.
|
|
||||||
#
|
#
|
||||||
# This script attempts to guess a canonical system name similar to
|
# Originally written by Per Bothner.
|
||||||
# config.sub. If it succeeds, it prints the system name on stdout, and
|
|
||||||
# exits with 0. Otherwise, it exits with 1.
|
|
||||||
#
|
#
|
||||||
# You can get the latest version of this script from:
|
# You can get the latest version of this script from:
|
||||||
# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
|
# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
|
||||||
|
#
|
||||||
|
# Please send patches with a ChangeLog entry to config-patches@gnu.org.
|
||||||
|
|
||||||
|
|
||||||
me=`echo "$0" | sed -e 's,.*/,,'`
|
me=`echo "$0" | sed -e 's,.*/,,'`
|
||||||
|
|
||||||
@ -54,9 +50,7 @@ version="\
|
|||||||
GNU config.guess ($timestamp)
|
GNU config.guess ($timestamp)
|
||||||
|
|
||||||
Originally written by Per Bothner.
|
Originally written by Per Bothner.
|
||||||
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
|
Copyright 1992-2013 Free Software Foundation, Inc.
|
||||||
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
|
|
||||||
Free Software Foundation, Inc.
|
|
||||||
|
|
||||||
This is free software; see the source for copying conditions. There is NO
|
This is free software; see the source for copying conditions. There is NO
|
||||||
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
|
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
|
||||||
@ -139,23 +133,21 @@ UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
|
|||||||
UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
|
UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
|
||||||
|
|
||||||
case "${UNAME_SYSTEM}" in
|
case "${UNAME_SYSTEM}" in
|
||||||
Linux|GNU/*)
|
Linux|GNU|GNU/*)
|
||||||
|
# If the system lacks a compiler, then just pick glibc.
|
||||||
|
# We could probably try harder.
|
||||||
|
LIBC=gnu
|
||||||
|
|
||||||
eval $set_cc_for_build
|
eval $set_cc_for_build
|
||||||
cat <<-EOF > $dummy.c
|
cat <<-EOF > $dummy.c
|
||||||
#include <features.h>
|
#include <features.h>
|
||||||
#ifdef __UCLIBC__
|
#if defined(__UCLIBC__)
|
||||||
# ifdef __UCLIBC_CONFIG_VERSION__
|
|
||||||
LIBC=uclibc __UCLIBC_CONFIG_VERSION__
|
|
||||||
# else
|
|
||||||
LIBC=uclibc
|
LIBC=uclibc
|
||||||
# endif
|
#elif defined(__dietlibc__)
|
||||||
#else
|
|
||||||
# ifdef __dietlibc__
|
|
||||||
LIBC=dietlibc
|
LIBC=dietlibc
|
||||||
#else
|
#else
|
||||||
LIBC=gnu
|
LIBC=gnu
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
EOF
|
EOF
|
||||||
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
|
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
|
||||||
;;
|
;;
|
||||||
@ -329,7 +321,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
|
|||||||
arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
|
arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
|
||||||
echo arm-acorn-riscix${UNAME_RELEASE}
|
echo arm-acorn-riscix${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
arm:riscos:*:*|arm:RISCOS:*:*)
|
arm*:riscos:*:*|arm*:RISCOS:*:*)
|
||||||
echo arm-unknown-riscos
|
echo arm-unknown-riscos
|
||||||
exit ;;
|
exit ;;
|
||||||
SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
|
SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
|
||||||
@ -912,6 +904,9 @@ EOF
|
|||||||
if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
|
if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
|
||||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
exit ;;
|
exit ;;
|
||||||
|
arc:Linux:*:* | arceb:Linux:*:*)
|
||||||
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
arm*:Linux:*:*)
|
arm*:Linux:*:*)
|
||||||
eval $set_cc_for_build
|
eval $set_cc_for_build
|
||||||
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
|
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
|
||||||
@ -974,6 +969,9 @@ EOF
|
|||||||
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
|
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
|
||||||
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
|
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
|
||||||
;;
|
;;
|
||||||
|
or1k:Linux:*:*)
|
||||||
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
or32:Linux:*:*)
|
or32:Linux:*:*)
|
||||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
exit ;;
|
exit ;;
|
||||||
@ -997,8 +995,14 @@ EOF
|
|||||||
ppc:Linux:*:*)
|
ppc:Linux:*:*)
|
||||||
echo powerpc-unknown-linux-${LIBC}
|
echo powerpc-unknown-linux-${LIBC}
|
||||||
exit ;;
|
exit ;;
|
||||||
|
ppc64le:Linux:*:*)
|
||||||
|
echo powerpc64le-unknown-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
|
ppcle:Linux:*:*)
|
||||||
|
echo powerpcle-unknown-linux-${LIBC}
|
||||||
|
exit ;;
|
||||||
s390:Linux:*:* | s390x:Linux:*:*)
|
s390:Linux:*:* | s390x:Linux:*:*)
|
||||||
echo ${UNAME_MACHINE}-ibm-linux
|
echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
|
||||||
exit ;;
|
exit ;;
|
||||||
sh64*:Linux:*:*)
|
sh64*:Linux:*:*)
|
||||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||||
@ -1252,19 +1256,31 @@ EOF
|
|||||||
exit ;;
|
exit ;;
|
||||||
*:Darwin:*:*)
|
*:Darwin:*:*)
|
||||||
UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
|
UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
|
||||||
case $UNAME_PROCESSOR in
|
|
||||||
i386)
|
|
||||||
eval $set_cc_for_build
|
eval $set_cc_for_build
|
||||||
|
if test "$UNAME_PROCESSOR" = unknown ; then
|
||||||
|
UNAME_PROCESSOR=powerpc
|
||||||
|
fi
|
||||||
|
if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then
|
||||||
if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
|
if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
|
||||||
if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
|
if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
|
||||||
(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
|
(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
|
||||||
grep IS_64BIT_ARCH >/dev/null
|
grep IS_64BIT_ARCH >/dev/null
|
||||||
then
|
then
|
||||||
UNAME_PROCESSOR="x86_64"
|
case $UNAME_PROCESSOR in
|
||||||
fi
|
i386) UNAME_PROCESSOR=x86_64 ;;
|
||||||
fi ;;
|
powerpc) UNAME_PROCESSOR=powerpc64 ;;
|
||||||
unknown) UNAME_PROCESSOR=powerpc ;;
|
|
||||||
esac
|
esac
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
elif test "$UNAME_PROCESSOR" = i386 ; then
|
||||||
|
# Avoid executing cc on OS X 10.9, as it ships with a stub
|
||||||
|
# that puts up a graphical alert prompting to install
|
||||||
|
# developer tools. Any system running Mac OS X 10.7 or
|
||||||
|
# later (Darwin 11 and later) is required to have a 64-bit
|
||||||
|
# processor. This is not true of the ARM version of Darwin
|
||||||
|
# that Apple uses in portable devices.
|
||||||
|
UNAME_PROCESSOR=x86_64
|
||||||
|
fi
|
||||||
echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
|
echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
|
||||||
exit ;;
|
exit ;;
|
||||||
*:procnto*:*:* | *:QNX:[0123456789]*:*)
|
*:procnto*:*:* | *:QNX:[0123456789]*:*)
|
||||||
|
@ -5,20 +5,28 @@
|
|||||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||||
systems.
|
systems.
|
||||||
|
|
||||||
In environments that support the facilities, config.h.in is converted by
|
In environments that support the GNU autotools, config.h.in is converted into
|
||||||
"configure", or config-cmake.h.in is converted by CMake, into config.h. If you
|
config.h by the "configure" script. In environments that use CMake,
|
||||||
are going to build PCRE "by hand" without using "configure" or CMake, you
|
config-cmake.in is converted into config.h. If you are going to build PCRE "by
|
||||||
should copy the distributed config.h.generic to config.h, and then edit the
|
hand" without using "configure" or CMake, you should copy the distributed
|
||||||
macro definitions to be the way you need them. You must then add
|
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||||
-DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
|
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||||
at the start of every source.
|
so that config.h is included at the start of every source.
|
||||||
|
|
||||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||||
|
but if you do, default values will be taken from config.h for non-boolean
|
||||||
|
macros that are not defined on the command line.
|
||||||
|
|
||||||
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
|
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE8 should either be defined
|
||||||
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
|
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
|
||||||
them both to 0; an emulation function will be used. */
|
macros are listed as a commented #undef in config.h.generic. Macros such as
|
||||||
|
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||||
|
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||||
|
|
||||||
|
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||||
|
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||||
|
sure both macros are undefined; an emulation function will then be used. */
|
||||||
|
|
||||||
/* By default, the \R escape sequence matches any Unicode line ending
|
/* By default, the \R escape sequence matches any Unicode line ending
|
||||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||||
@ -44,27 +52,19 @@ them both to 0; an emulation function will be used. */
|
|||||||
/* #undef EBCDIC_NL25 */
|
/* #undef EBCDIC_NL25 */
|
||||||
|
|
||||||
/* Define to 1 if you have the `bcopy' function. */
|
/* Define to 1 if you have the `bcopy' function. */
|
||||||
#ifndef HAVE_BCOPY
|
/* #undef HAVE_BCOPY */
|
||||||
#define HAVE_BCOPY 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <bits/type_traits.h> header file. */
|
/* Define to 1 if you have the <bits/type_traits.h> header file. */
|
||||||
/* #undef HAVE_BITS_TYPE_TRAITS_H */
|
/* #undef HAVE_BITS_TYPE_TRAITS_H */
|
||||||
|
|
||||||
/* Define to 1 if you have the <bzlib.h> header file. */
|
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||||
#ifndef HAVE_BZLIB_H
|
/* #undef HAVE_BZLIB_H */
|
||||||
#define HAVE_BZLIB_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <dirent.h> header file. */
|
/* Define to 1 if you have the <dirent.h> header file. */
|
||||||
#ifndef HAVE_DIRENT_H
|
/* #undef HAVE_DIRENT_H */
|
||||||
#define HAVE_DIRENT_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||||
#ifndef HAVE_DLFCN_H
|
/* #undef HAVE_DLFCN_H */
|
||||||
#define HAVE_DLFCN_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <editline/readline.h> header file. */
|
/* Define to 1 if you have the <editline/readline.h> header file. */
|
||||||
/* #undef HAVE_EDITLINE_READLINE_H */
|
/* #undef HAVE_EDITLINE_READLINE_H */
|
||||||
@ -73,29 +73,19 @@ them both to 0; an emulation function will be used. */
|
|||||||
/* #undef HAVE_EDIT_READLINE_READLINE_H */
|
/* #undef HAVE_EDIT_READLINE_READLINE_H */
|
||||||
|
|
||||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||||
#ifndef HAVE_INTTYPES_H
|
/* #undef HAVE_INTTYPES_H */
|
||||||
#define HAVE_INTTYPES_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <limits.h> header file. */
|
/* Define to 1 if you have the <limits.h> header file. */
|
||||||
#ifndef HAVE_LIMITS_H
|
/* #undef HAVE_LIMITS_H */
|
||||||
#define HAVE_LIMITS_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if the system has the type `long long'. */
|
/* Define to 1 if the system has the type `long long'. */
|
||||||
#ifndef HAVE_LONG_LONG
|
/* #undef HAVE_LONG_LONG */
|
||||||
#define HAVE_LONG_LONG 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `memmove' function. */
|
/* Define to 1 if you have the `memmove' function. */
|
||||||
#ifndef HAVE_MEMMOVE
|
/* #undef HAVE_MEMMOVE */
|
||||||
#define HAVE_MEMMOVE 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <memory.h> header file. */
|
/* Define to 1 if you have the <memory.h> header file. */
|
||||||
#ifndef HAVE_MEMORY_H
|
/* #undef HAVE_MEMORY_H */
|
||||||
#define HAVE_MEMORY_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define if you have POSIX threads libraries and header files. */
|
/* Define if you have POSIX threads libraries and header files. */
|
||||||
/* #undef HAVE_PTHREAD */
|
/* #undef HAVE_PTHREAD */
|
||||||
@ -110,34 +100,22 @@ them both to 0; an emulation function will be used. */
|
|||||||
/* #undef HAVE_READLINE_READLINE_H */
|
/* #undef HAVE_READLINE_READLINE_H */
|
||||||
|
|
||||||
/* Define to 1 if you have the <stdint.h> header file. */
|
/* Define to 1 if you have the <stdint.h> header file. */
|
||||||
#ifndef HAVE_STDINT_H
|
/* #undef HAVE_STDINT_H */
|
||||||
#define HAVE_STDINT_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||||
#ifndef HAVE_STDLIB_H
|
/* #undef HAVE_STDLIB_H */
|
||||||
#define HAVE_STDLIB_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `strerror' function. */
|
/* Define to 1 if you have the `strerror' function. */
|
||||||
#ifndef HAVE_STRERROR
|
/* #undef HAVE_STRERROR */
|
||||||
#define HAVE_STRERROR 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <string> header file. */
|
/* Define to 1 if you have the <string> header file. */
|
||||||
#ifndef HAVE_STRING
|
/* #undef HAVE_STRING */
|
||||||
#define HAVE_STRING 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <strings.h> header file. */
|
/* Define to 1 if you have the <strings.h> header file. */
|
||||||
#ifndef HAVE_STRINGS_H
|
/* #undef HAVE_STRINGS_H */
|
||||||
#define HAVE_STRINGS_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <string.h> header file. */
|
/* Define to 1 if you have the <string.h> header file. */
|
||||||
#ifndef HAVE_STRING_H
|
/* #undef HAVE_STRING_H */
|
||||||
#define HAVE_STRING_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have `strtoimax'. */
|
/* Define to 1 if you have `strtoimax'. */
|
||||||
/* #undef HAVE_STRTOIMAX */
|
/* #undef HAVE_STRTOIMAX */
|
||||||
@ -146,46 +124,31 @@ them both to 0; an emulation function will be used. */
|
|||||||
/* #undef HAVE_STRTOLL */
|
/* #undef HAVE_STRTOLL */
|
||||||
|
|
||||||
/* Define to 1 if you have `strtoq'. */
|
/* Define to 1 if you have `strtoq'. */
|
||||||
#ifndef HAVE_STRTOQ
|
/* #undef HAVE_STRTOQ */
|
||||||
#define HAVE_STRTOQ 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||||
#ifndef HAVE_SYS_STAT_H
|
/* #undef HAVE_SYS_STAT_H */
|
||||||
#define HAVE_SYS_STAT_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||||
#ifndef HAVE_SYS_TYPES_H
|
/* #undef HAVE_SYS_TYPES_H */
|
||||||
#define HAVE_SYS_TYPES_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <type_traits.h> header file. */
|
/* Define to 1 if you have the <type_traits.h> header file. */
|
||||||
/* #undef HAVE_TYPE_TRAITS_H */
|
/* #undef HAVE_TYPE_TRAITS_H */
|
||||||
|
|
||||||
/* Define to 1 if you have the <unistd.h> header file. */
|
/* Define to 1 if you have the <unistd.h> header file. */
|
||||||
#ifndef HAVE_UNISTD_H
|
/* #undef HAVE_UNISTD_H */
|
||||||
#define HAVE_UNISTD_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if the system has the type `unsigned long long'. */
|
/* Define to 1 if the system has the type `unsigned long long'. */
|
||||||
#ifndef HAVE_UNSIGNED_LONG_LONG
|
/* #undef HAVE_UNSIGNED_LONG_LONG */
|
||||||
#define HAVE_UNSIGNED_LONG_LONG 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 or 0, depending whether the compiler supports simple visibility
|
/* Define to 1 if the compiler supports simple visibility declarations. */
|
||||||
declarations. */
|
/* #undef HAVE_VISIBILITY */
|
||||||
#ifndef HAVE_VISIBILITY
|
|
||||||
#define HAVE_VISIBILITY 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <windows.h> header file. */
|
/* Define to 1 if you have the <windows.h> header file. */
|
||||||
/* #undef HAVE_WINDOWS_H */
|
/* #undef HAVE_WINDOWS_H */
|
||||||
|
|
||||||
/* Define to 1 if you have the <zlib.h> header file. */
|
/* Define to 1 if you have the <zlib.h> header file. */
|
||||||
#ifndef HAVE_ZLIB_H
|
/* #undef HAVE_ZLIB_H */
|
||||||
#define HAVE_ZLIB_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to 1 if you have `_strtoi64'. */
|
/* Define to 1 if you have `_strtoi64'. */
|
||||||
/* #undef HAVE__STRTOI64 */
|
/* #undef HAVE__STRTOI64 */
|
||||||
@ -201,6 +164,7 @@ them both to 0; an emulation function will be used. */
|
|||||||
|
|
||||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||||
*/
|
*/
|
||||||
|
/* This is ignored unless you are using libtool. */
|
||||||
#ifndef LT_OBJDIR
|
#ifndef LT_OBJDIR
|
||||||
#define LT_OBJDIR ".libs/"
|
#define LT_OBJDIR ".libs/"
|
||||||
#endif
|
#endif
|
||||||
@ -253,9 +217,6 @@ them both to 0; an emulation function will be used. */
|
|||||||
#define NEWLINE 10
|
#define NEWLINE 10
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
|
|
||||||
/* #undef NO_MINUS_C_MINUS_O */
|
|
||||||
|
|
||||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||||
This can sometimes be a problem on systems that have stacks of limited
|
This can sometimes be a problem on systems that have stacks of limited
|
||||||
size. Define NO_RECURSE to any value to get a version that doesn't use
|
size. Define NO_RECURSE to any value to get a version that doesn't use
|
||||||
@ -275,7 +236,7 @@ them both to 0; an emulation function will be used. */
|
|||||||
#define PACKAGE_NAME "PCRE"
|
#define PACKAGE_NAME "PCRE"
|
||||||
|
|
||||||
/* Define to the full name and version of this package. */
|
/* Define to the full name and version of this package. */
|
||||||
#define PACKAGE_STRING "PCRE 8.32"
|
#define PACKAGE_STRING "PCRE 8.35"
|
||||||
|
|
||||||
/* Define to the one symbol short name of this package. */
|
/* Define to the one symbol short name of this package. */
|
||||||
#define PACKAGE_TARNAME "pcre"
|
#define PACKAGE_TARNAME "pcre"
|
||||||
@ -284,7 +245,14 @@ them both to 0; an emulation function will be used. */
|
|||||||
#define PACKAGE_URL ""
|
#define PACKAGE_URL ""
|
||||||
|
|
||||||
/* Define to the version of this package. */
|
/* Define to the version of this package. */
|
||||||
#define PACKAGE_VERSION "8.32"
|
#define PACKAGE_VERSION "8.35"
|
||||||
|
|
||||||
|
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||||
|
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||||
|
stack that is used while compiling a pattern. */
|
||||||
|
#ifndef PARENS_NEST_LIMIT
|
||||||
|
#define PARENS_NEST_LIMIT 250
|
||||||
|
#endif
|
||||||
|
|
||||||
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
|
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
|
||||||
pcregrep to hold parts of the file it is searching. This is also the
|
pcregrep to hold parts of the file it is searching. This is also the
|
||||||
@ -325,13 +293,7 @@ them both to 0; an emulation function will be used. */
|
|||||||
/* #undef PTHREAD_CREATE_JOINABLE */
|
/* #undef PTHREAD_CREATE_JOINABLE */
|
||||||
|
|
||||||
/* Define to 1 if you have the ANSI C header files. */
|
/* Define to 1 if you have the ANSI C header files. */
|
||||||
#ifndef STDC_HEADERS
|
/* #undef STDC_HEADERS */
|
||||||
#define STDC_HEADERS 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to allow pcretest and pcregrep to be linked with gcov, so that they
|
|
||||||
are able to generate code coverage reports. */
|
|
||||||
/* #undef SUPPORT_GCOV */
|
|
||||||
|
|
||||||
/* Define to any value to enable support for Just-In-Time compiling. */
|
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||||
/* #undef SUPPORT_JIT */
|
/* #undef SUPPORT_JIT */
|
||||||
@ -357,9 +319,7 @@ them both to 0; an emulation function will be used. */
|
|||||||
/* #undef SUPPORT_PCRE32 */
|
/* #undef SUPPORT_PCRE32 */
|
||||||
|
|
||||||
/* Define to any value to enable the 8 bit PCRE library. */
|
/* Define to any value to enable the 8 bit PCRE library. */
|
||||||
#ifndef SUPPORT_PCRE8
|
/* #undef SUPPORT_PCRE8 */
|
||||||
#define SUPPORT_PCRE8 /**/
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to any value to enable JIT support in pcregrep. */
|
/* Define to any value to enable JIT support in pcregrep. */
|
||||||
/* #undef SUPPORT_PCREGREP_JIT */
|
/* #undef SUPPORT_PCREGREP_JIT */
|
||||||
@ -373,13 +333,11 @@ them both to 0; an emulation function will be used. */
|
|||||||
ASCII/UTF-8/16/32, but not both at once. */
|
ASCII/UTF-8/16/32, but not both at once. */
|
||||||
/* #undef SUPPORT_UTF */
|
/* #undef SUPPORT_UTF */
|
||||||
|
|
||||||
/* Valgrind support to find invalid memory reads. */
|
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||||
/* #undef SUPPORT_VALGRIND */
|
/* #undef SUPPORT_VALGRIND */
|
||||||
|
|
||||||
/* Version number of package */
|
/* Version number of package */
|
||||||
#ifndef VERSION
|
#define VERSION "8.35"
|
||||||
#define VERSION "8.32"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to empty if `const' does not conform to ANSI C. */
|
/* Define to empty if `const' does not conform to ANSI C. */
|
||||||
/* #undef const */
|
/* #undef const */
|
||||||
|
@ -5,20 +5,28 @@
|
|||||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||||
systems.
|
systems.
|
||||||
|
|
||||||
In environments that support the facilities, config.h.in is converted by
|
In environments that support the GNU autotools, config.h.in is converted into
|
||||||
"configure", or config-cmake.h.in is converted by CMake, into config.h. If you
|
config.h by the "configure" script. In environments that use CMake,
|
||||||
are going to build PCRE "by hand" without using "configure" or CMake, you
|
config-cmake.in is converted into config.h. If you are going to build PCRE "by
|
||||||
should copy the distributed config.h.generic to config.h, and then edit the
|
hand" without using "configure" or CMake, you should copy the distributed
|
||||||
macro definitions to be the way you need them. You must then add
|
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||||
-DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
|
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||||
at the start of every source.
|
so that config.h is included at the start of every source.
|
||||||
|
|
||||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||||
|
but if you do, default values will be taken from config.h for non-boolean
|
||||||
|
macros that are not defined on the command line.
|
||||||
|
|
||||||
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
|
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE8 should either be defined
|
||||||
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
|
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
|
||||||
them both to 0; an emulation function will be used. */
|
macros are listed as a commented #undef in config.h.generic. Macros such as
|
||||||
|
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||||
|
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||||
|
|
||||||
|
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||||
|
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||||
|
sure both macros are undefined; an emulation function will then be used. */
|
||||||
|
|
||||||
/* By default, the \R escape sequence matches any Unicode line ending
|
/* By default, the \R escape sequence matches any Unicode line ending
|
||||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||||
@ -133,8 +141,7 @@ them both to 0; an emulation function will be used. */
|
|||||||
/* Define to 1 if the system has the type `unsigned long long'. */
|
/* Define to 1 if the system has the type `unsigned long long'. */
|
||||||
#undef HAVE_UNSIGNED_LONG_LONG
|
#undef HAVE_UNSIGNED_LONG_LONG
|
||||||
|
|
||||||
/* Define to 1 or 0, depending whether the compiler supports simple visibility
|
/* Define to 1 if the compiler supports simple visibility declarations. */
|
||||||
declarations. */
|
|
||||||
#undef HAVE_VISIBILITY
|
#undef HAVE_VISIBILITY
|
||||||
|
|
||||||
/* Define to 1 if you have the <windows.h> header file. */
|
/* Define to 1 if you have the <windows.h> header file. */
|
||||||
@ -195,9 +202,6 @@ them both to 0; an emulation function will be used. */
|
|||||||
or -2 (ANYCRLF). */
|
or -2 (ANYCRLF). */
|
||||||
#undef NEWLINE
|
#undef NEWLINE
|
||||||
|
|
||||||
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
|
|
||||||
#undef NO_MINUS_C_MINUS_O
|
|
||||||
|
|
||||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||||
This can sometimes be a problem on systems that have stacks of limited
|
This can sometimes be a problem on systems that have stacks of limited
|
||||||
size. Define NO_RECURSE to any value to get a version that doesn't use
|
size. Define NO_RECURSE to any value to get a version that doesn't use
|
||||||
@ -228,6 +232,11 @@ them both to 0; an emulation function will be used. */
|
|||||||
/* Define to the version of this package. */
|
/* Define to the version of this package. */
|
||||||
#undef PACKAGE_VERSION
|
#undef PACKAGE_VERSION
|
||||||
|
|
||||||
|
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||||
|
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||||
|
stack that is used while compiling a pattern. */
|
||||||
|
#undef PARENS_NEST_LIMIT
|
||||||
|
|
||||||
/* to make a symbol visible */
|
/* to make a symbol visible */
|
||||||
#undef PCRECPP_EXP_DECL
|
#undef PCRECPP_EXP_DECL
|
||||||
|
|
||||||
@ -284,10 +293,6 @@ them both to 0; an emulation function will be used. */
|
|||||||
/* Define to 1 if you have the ANSI C header files. */
|
/* Define to 1 if you have the ANSI C header files. */
|
||||||
#undef STDC_HEADERS
|
#undef STDC_HEADERS
|
||||||
|
|
||||||
/* Define to allow pcretest and pcregrep to be linked with gcov, so that they
|
|
||||||
are able to generate code coverage reports. */
|
|
||||||
#undef SUPPORT_GCOV
|
|
||||||
|
|
||||||
/* Define to any value to enable support for Just-In-Time compiling. */
|
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||||
#undef SUPPORT_JIT
|
#undef SUPPORT_JIT
|
||||||
|
|
||||||
@ -326,7 +331,7 @@ them both to 0; an emulation function will be used. */
|
|||||||
ASCII/UTF-8/16/32, but not both at once. */
|
ASCII/UTF-8/16/32, but not both at once. */
|
||||||
#undef SUPPORT_UTF
|
#undef SUPPORT_UTF
|
||||||
|
|
||||||
/* Valgrind support to find invalid memory reads. */
|
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||||
#undef SUPPORT_VALGRIND
|
#undef SUPPORT_VALGRIND
|
||||||
|
|
||||||
/* Version number of package */
|
/* Version number of package */
|
||||||
|
109
tools/pcre/config.sub
vendored
109
tools/pcre/config.sub
vendored
@ -1,24 +1,18 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Configuration validation subroutine script.
|
# Configuration validation subroutine script.
|
||||||
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
|
# Copyright 1992-2013 Free Software Foundation, Inc.
|
||||||
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
|
|
||||||
# 2011, 2012 Free Software Foundation, Inc.
|
|
||||||
|
|
||||||
timestamp='2012-08-18'
|
timestamp='2013-10-01'
|
||||||
|
|
||||||
# This file is (in principle) common to ALL GNU software.
|
# This file is free software; you can redistribute it and/or modify it
|
||||||
# The presence of a machine in this file suggests that SOME GNU software
|
# under the terms of the GNU General Public License as published by
|
||||||
# can handle that machine. It does not imply ALL GNU software can.
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
#
|
|
||||||
# This file is free software; you can redistribute it and/or modify
|
|
||||||
# it under the terms of the GNU General Public License as published by
|
|
||||||
# the Free Software Foundation; either version 2 of the License, or
|
|
||||||
# (at your option) any later version.
|
# (at your option) any later version.
|
||||||
#
|
#
|
||||||
# This program is distributed in the hope that it will be useful,
|
# This program is distributed in the hope that it will be useful, but
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
# GNU General Public License for more details.
|
# General Public License for more details.
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with this program; if not, see <http://www.gnu.org/licenses/>.
|
# along with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||||
@ -26,11 +20,12 @@ timestamp='2012-08-18'
|
|||||||
# As a special exception to the GNU General Public License, if you
|
# As a special exception to the GNU General Public License, if you
|
||||||
# distribute this file as part of a program that contains a
|
# distribute this file as part of a program that contains a
|
||||||
# configuration script generated by Autoconf, you may include it under
|
# configuration script generated by Autoconf, you may include it under
|
||||||
# the same distribution terms that you use for the rest of that program.
|
# the same distribution terms that you use for the rest of that
|
||||||
|
# program. This Exception is an additional permission under section 7
|
||||||
|
# of the GNU General Public License, version 3 ("GPLv3").
|
||||||
|
|
||||||
|
|
||||||
# Please send patches to <config-patches@gnu.org>. Submit a context
|
# Please send patches with a ChangeLog entry to config-patches@gnu.org.
|
||||||
# diff and a properly formatted GNU ChangeLog entry.
|
|
||||||
#
|
#
|
||||||
# Configuration subroutine to validate and canonicalize a configuration type.
|
# Configuration subroutine to validate and canonicalize a configuration type.
|
||||||
# Supply the specified configuration type as an argument.
|
# Supply the specified configuration type as an argument.
|
||||||
@ -73,9 +68,7 @@ Report bugs and patches to <config-patches@gnu.org>."
|
|||||||
version="\
|
version="\
|
||||||
GNU config.sub ($timestamp)
|
GNU config.sub ($timestamp)
|
||||||
|
|
||||||
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
|
Copyright 1992-2013 Free Software Foundation, Inc.
|
||||||
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
|
|
||||||
Free Software Foundation, Inc.
|
|
||||||
|
|
||||||
This is free software; see the source for copying conditions. There is NO
|
This is free software; see the source for copying conditions. There is NO
|
||||||
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
|
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
|
||||||
@ -156,7 +149,7 @@ case $os in
|
|||||||
-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
|
-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
|
||||||
-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
|
-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
|
||||||
-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
|
-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
|
||||||
-apple | -axis | -knuth | -cray | -microblaze)
|
-apple | -axis | -knuth | -cray | -microblaze*)
|
||||||
os=
|
os=
|
||||||
basic_machine=$1
|
basic_machine=$1
|
||||||
;;
|
;;
|
||||||
@ -259,21 +252,24 @@ case $basic_machine in
|
|||||||
| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
|
| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
|
||||||
| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
|
| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
|
||||||
| am33_2.0 \
|
| am33_2.0 \
|
||||||
| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
|
| arc | arceb \
|
||||||
|
| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
|
||||||
|
| avr | avr32 \
|
||||||
| be32 | be64 \
|
| be32 | be64 \
|
||||||
| bfin \
|
| bfin \
|
||||||
| c4x | clipper \
|
| c4x | c8051 | clipper \
|
||||||
| d10v | d30v | dlx | dsp16xx | dvp \
|
| d10v | d30v | dlx | dsp16xx \
|
||||||
| epiphany \
|
| epiphany \
|
||||||
| fido | fr30 | frv \
|
| fido | fr30 | frv \
|
||||||
| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
|
| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
|
||||||
| hexagon \
|
| hexagon \
|
||||||
| i370 | i860 | i960 | ia64 \
|
| i370 | i860 | i960 | ia64 \
|
||||||
| ip2k | iq2000 \
|
| ip2k | iq2000 \
|
||||||
|
| k1om \
|
||||||
| le32 | le64 \
|
| le32 | le64 \
|
||||||
| lm32 \
|
| lm32 \
|
||||||
| m32c | m32r | m32rle | m68000 | m68k | m88k \
|
| m32c | m32r | m32rle | m68000 | m68k | m88k \
|
||||||
| maxq | mb | microblaze | mcore | mep | metag \
|
| maxq | mb | microblaze | microblazeel | mcore | mep | metag \
|
||||||
| mips | mipsbe | mipseb | mipsel | mipsle \
|
| mips | mipsbe | mipseb | mipsel | mipsle \
|
||||||
| mips16 \
|
| mips16 \
|
||||||
| mips64 | mips64el \
|
| mips64 | mips64el \
|
||||||
@ -291,16 +287,17 @@ case $basic_machine in
|
|||||||
| mipsisa64r2 | mipsisa64r2el \
|
| mipsisa64r2 | mipsisa64r2el \
|
||||||
| mipsisa64sb1 | mipsisa64sb1el \
|
| mipsisa64sb1 | mipsisa64sb1el \
|
||||||
| mipsisa64sr71k | mipsisa64sr71kel \
|
| mipsisa64sr71k | mipsisa64sr71kel \
|
||||||
|
| mipsr5900 | mipsr5900el \
|
||||||
| mipstx39 | mipstx39el \
|
| mipstx39 | mipstx39el \
|
||||||
| mn10200 | mn10300 \
|
| mn10200 | mn10300 \
|
||||||
| moxie \
|
| moxie \
|
||||||
| mt \
|
| mt \
|
||||||
| msp430 \
|
| msp430 \
|
||||||
| nds32 | nds32le | nds32be \
|
| nds32 | nds32le | nds32be \
|
||||||
| nios | nios2 \
|
| nios | nios2 | nios2eb | nios2el \
|
||||||
| ns16k | ns32k \
|
| ns16k | ns32k \
|
||||||
| open8 \
|
| open8 \
|
||||||
| or32 \
|
| or1k | or32 \
|
||||||
| pdp10 | pdp11 | pj | pjl \
|
| pdp10 | pdp11 | pj | pjl \
|
||||||
| powerpc | powerpc64 | powerpc64le | powerpcle \
|
| powerpc | powerpc64 | powerpc64le | powerpcle \
|
||||||
| pyramid \
|
| pyramid \
|
||||||
@ -328,7 +325,7 @@ case $basic_machine in
|
|||||||
c6x)
|
c6x)
|
||||||
basic_machine=tic6x-unknown
|
basic_machine=tic6x-unknown
|
||||||
;;
|
;;
|
||||||
m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
|
m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
|
||||||
basic_machine=$basic_machine-unknown
|
basic_machine=$basic_machine-unknown
|
||||||
os=-none
|
os=-none
|
||||||
;;
|
;;
|
||||||
@ -370,13 +367,13 @@ case $basic_machine in
|
|||||||
| aarch64-* | aarch64_be-* \
|
| aarch64-* | aarch64_be-* \
|
||||||
| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
|
| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
|
||||||
| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
|
| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
|
||||||
| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
|
| alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
|
||||||
| arm-* | armbe-* | armle-* | armeb-* | armv*-* \
|
| arm-* | armbe-* | armle-* | armeb-* | armv*-* \
|
||||||
| avr-* | avr32-* \
|
| avr-* | avr32-* \
|
||||||
| be32-* | be64-* \
|
| be32-* | be64-* \
|
||||||
| bfin-* | bs2000-* \
|
| bfin-* | bs2000-* \
|
||||||
| c[123]* | c30-* | [cjt]90-* | c4x-* \
|
| c[123]* | c30-* | [cjt]90-* | c4x-* \
|
||||||
| clipper-* | craynv-* | cydra-* \
|
| c8051-* | clipper-* | craynv-* | cydra-* \
|
||||||
| d10v-* | d30v-* | dlx-* \
|
| d10v-* | d30v-* | dlx-* \
|
||||||
| elxsi-* \
|
| elxsi-* \
|
||||||
| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
|
| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
|
||||||
@ -385,11 +382,13 @@ case $basic_machine in
|
|||||||
| hexagon-* \
|
| hexagon-* \
|
||||||
| i*86-* | i860-* | i960-* | ia64-* \
|
| i*86-* | i860-* | i960-* | ia64-* \
|
||||||
| ip2k-* | iq2000-* \
|
| ip2k-* | iq2000-* \
|
||||||
|
| k1om-* \
|
||||||
| le32-* | le64-* \
|
| le32-* | le64-* \
|
||||||
| lm32-* \
|
| lm32-* \
|
||||||
| m32c-* | m32r-* | m32rle-* \
|
| m32c-* | m32r-* | m32rle-* \
|
||||||
| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
|
| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
|
||||||
| m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
|
| m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
|
||||||
|
| microblaze-* | microblazeel-* \
|
||||||
| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
|
| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
|
||||||
| mips16-* \
|
| mips16-* \
|
||||||
| mips64-* | mips64el-* \
|
| mips64-* | mips64el-* \
|
||||||
@ -407,12 +406,13 @@ case $basic_machine in
|
|||||||
| mipsisa64r2-* | mipsisa64r2el-* \
|
| mipsisa64r2-* | mipsisa64r2el-* \
|
||||||
| mipsisa64sb1-* | mipsisa64sb1el-* \
|
| mipsisa64sb1-* | mipsisa64sb1el-* \
|
||||||
| mipsisa64sr71k-* | mipsisa64sr71kel-* \
|
| mipsisa64sr71k-* | mipsisa64sr71kel-* \
|
||||||
|
| mipsr5900-* | mipsr5900el-* \
|
||||||
| mipstx39-* | mipstx39el-* \
|
| mipstx39-* | mipstx39el-* \
|
||||||
| mmix-* \
|
| mmix-* \
|
||||||
| mt-* \
|
| mt-* \
|
||||||
| msp430-* \
|
| msp430-* \
|
||||||
| nds32-* | nds32le-* | nds32be-* \
|
| nds32-* | nds32le-* | nds32be-* \
|
||||||
| nios-* | nios2-* \
|
| nios-* | nios2-* | nios2eb-* | nios2el-* \
|
||||||
| none-* | np1-* | ns16k-* | ns32k-* \
|
| none-* | np1-* | ns16k-* | ns32k-* \
|
||||||
| open8-* \
|
| open8-* \
|
||||||
| orion-* \
|
| orion-* \
|
||||||
@ -788,7 +788,7 @@ case $basic_machine in
|
|||||||
basic_machine=ns32k-utek
|
basic_machine=ns32k-utek
|
||||||
os=-sysv
|
os=-sysv
|
||||||
;;
|
;;
|
||||||
microblaze)
|
microblaze*)
|
||||||
basic_machine=microblaze-xilinx
|
basic_machine=microblaze-xilinx
|
||||||
;;
|
;;
|
||||||
mingw64)
|
mingw64)
|
||||||
@ -796,7 +796,7 @@ case $basic_machine in
|
|||||||
os=-mingw64
|
os=-mingw64
|
||||||
;;
|
;;
|
||||||
mingw32)
|
mingw32)
|
||||||
basic_machine=i386-pc
|
basic_machine=i686-pc
|
||||||
os=-mingw32
|
os=-mingw32
|
||||||
;;
|
;;
|
||||||
mingw32ce)
|
mingw32ce)
|
||||||
@ -810,24 +810,6 @@ case $basic_machine in
|
|||||||
basic_machine=m68k-atari
|
basic_machine=m68k-atari
|
||||||
os=-mint
|
os=-mint
|
||||||
;;
|
;;
|
||||||
mipsEE* | ee | ps2)
|
|
||||||
basic_machine=mips64r5900el-scei
|
|
||||||
case $os in
|
|
||||||
-linux*)
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
os=-elf
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
;;
|
|
||||||
iop)
|
|
||||||
basic_machine=mipsel-scei
|
|
||||||
os=-irx
|
|
||||||
;;
|
|
||||||
dvp)
|
|
||||||
basic_machine=dvp-scei
|
|
||||||
os=-elf
|
|
||||||
;;
|
|
||||||
mips3*-*)
|
mips3*-*)
|
||||||
basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
|
basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
|
||||||
;;
|
;;
|
||||||
@ -850,7 +832,7 @@ case $basic_machine in
|
|||||||
basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
|
basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
|
||||||
;;
|
;;
|
||||||
msys)
|
msys)
|
||||||
basic_machine=i386-pc
|
basic_machine=i686-pc
|
||||||
os=-msys
|
os=-msys
|
||||||
;;
|
;;
|
||||||
mvs)
|
mvs)
|
||||||
@ -1041,7 +1023,11 @@ case $basic_machine in
|
|||||||
basic_machine=i586-unknown
|
basic_machine=i586-unknown
|
||||||
os=-pw32
|
os=-pw32
|
||||||
;;
|
;;
|
||||||
rdos)
|
rdos | rdos64)
|
||||||
|
basic_machine=x86_64-pc
|
||||||
|
os=-rdos
|
||||||
|
;;
|
||||||
|
rdos32)
|
||||||
basic_machine=i386-pc
|
basic_machine=i386-pc
|
||||||
os=-rdos
|
os=-rdos
|
||||||
;;
|
;;
|
||||||
@ -1368,7 +1354,7 @@ case $os in
|
|||||||
-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
|
-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
|
||||||
| -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
|
| -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
|
||||||
| -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
|
| -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
|
||||||
| -sym* | -kopensolaris* \
|
| -sym* | -kopensolaris* | -plan9* \
|
||||||
| -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
|
| -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
|
||||||
| -aos* | -aros* \
|
| -aos* | -aros* \
|
||||||
| -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
|
| -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
|
||||||
@ -1386,7 +1372,7 @@ case $os in
|
|||||||
| -uxpv* | -beos* | -mpeix* | -udk* \
|
| -uxpv* | -beos* | -mpeix* | -udk* \
|
||||||
| -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
|
| -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
|
||||||
| -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
|
| -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
|
||||||
| -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* | -irx* \
|
| -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
|
||||||
| -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
|
| -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
|
||||||
| -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
|
| -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
|
||||||
| -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
|
| -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
|
||||||
@ -1514,9 +1500,6 @@ case $os in
|
|||||||
-aros*)
|
-aros*)
|
||||||
os=-aros
|
os=-aros
|
||||||
;;
|
;;
|
||||||
-kaos*)
|
|
||||||
os=-kaos
|
|
||||||
;;
|
|
||||||
-zvmoe)
|
-zvmoe)
|
||||||
os=-zvmoe
|
os=-zvmoe
|
||||||
;;
|
;;
|
||||||
@ -1565,6 +1548,9 @@ case $basic_machine in
|
|||||||
c4x-* | tic4x-*)
|
c4x-* | tic4x-*)
|
||||||
os=-coff
|
os=-coff
|
||||||
;;
|
;;
|
||||||
|
c8051-*)
|
||||||
|
os=-elf
|
||||||
|
;;
|
||||||
hexagon-*)
|
hexagon-*)
|
||||||
os=-elf
|
os=-elf
|
||||||
;;
|
;;
|
||||||
@ -1608,6 +1594,9 @@ case $basic_machine in
|
|||||||
mips*-*)
|
mips*-*)
|
||||||
os=-elf
|
os=-elf
|
||||||
;;
|
;;
|
||||||
|
or1k-*)
|
||||||
|
os=-elf
|
||||||
|
;;
|
||||||
or32-*)
|
or32-*)
|
||||||
os=-coff
|
os=-coff
|
||||||
;;
|
;;
|
||||||
|
2074
tools/pcre/configure
vendored
2074
tools/pcre/configure
vendored
File diff suppressed because it is too large
Load Diff
@ -9,18 +9,18 @@ dnl The PCRE_PRERELEASE feature is for identifying release candidates. It might
|
|||||||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||||
|
|
||||||
m4_define(pcre_major, [8])
|
m4_define(pcre_major, [8])
|
||||||
m4_define(pcre_minor, [32])
|
m4_define(pcre_minor, [35])
|
||||||
m4_define(pcre_prerelease, [])
|
m4_define(pcre_prerelease, [])
|
||||||
m4_define(pcre_date, [2012-11-30])
|
m4_define(pcre_date, [2014-04-04])
|
||||||
|
|
||||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||||
# 50 lines of this file. Please update that if the variables above are moved.
|
# 50 lines of this file. Please update that if the variables above are moved.
|
||||||
|
|
||||||
# Libtool shared library interface versions (current:revision:age)
|
# Libtool shared library interface versions (current:revision:age)
|
||||||
m4_define(libpcre_version, [3:0:2])
|
m4_define(libpcre_version, [3:3:2])
|
||||||
m4_define(libpcre16_version, [2:0:2])
|
m4_define(libpcre16_version, [2:3:2])
|
||||||
m4_define(libpcre32_version, [0:0:0])
|
m4_define(libpcre32_version, [0:3:0])
|
||||||
m4_define(libpcreposix_version, [0:1:0])
|
m4_define(libpcreposix_version, [0:2:0])
|
||||||
m4_define(libpcrecpp_version, [0:0:0])
|
m4_define(libpcrecpp_version, [0:0:0])
|
||||||
|
|
||||||
AC_PREREQ(2.57)
|
AC_PREREQ(2.57)
|
||||||
@ -30,6 +30,9 @@ AM_INIT_AUTOMAKE([dist-bzip2 dist-zip])
|
|||||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||||
AC_CONFIG_HEADERS(config.h)
|
AC_CONFIG_HEADERS(config.h)
|
||||||
|
|
||||||
|
# This is a new thing required to stop a warning from automake 1.12
|
||||||
|
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
|
||||||
|
|
||||||
# This was added at the suggestion of libtoolize (03-Jan-10)
|
# This was added at the suggestion of libtoolize (03-Jan-10)
|
||||||
AC_CONFIG_MACRO_DIR([m4])
|
AC_CONFIG_MACRO_DIR([m4])
|
||||||
|
|
||||||
@ -245,7 +248,7 @@ AC_ARG_ENABLE(pcregrep-libbz2,
|
|||||||
# Handle --with-pcregrep-bufsize=N
|
# Handle --with-pcregrep-bufsize=N
|
||||||
AC_ARG_WITH(pcregrep-bufsize,
|
AC_ARG_WITH(pcregrep-bufsize,
|
||||||
AS_HELP_STRING([--with-pcregrep-bufsize=N],
|
AS_HELP_STRING([--with-pcregrep-bufsize=N],
|
||||||
[pcregrep buffer size (default=20480)]),
|
[pcregrep buffer size (default=20480, minimum=8192)]),
|
||||||
, with_pcregrep_bufsize=20480)
|
, with_pcregrep_bufsize=20480)
|
||||||
|
|
||||||
# Handle --enable-pcretest-libedit
|
# Handle --enable-pcretest-libedit
|
||||||
@ -272,6 +275,12 @@ AC_ARG_WITH(link-size,
|
|||||||
[internal link size (2, 3, or 4 allowed; default=2)]),
|
[internal link size (2, 3, or 4 allowed; default=2)]),
|
||||||
, with_link_size=2)
|
, with_link_size=2)
|
||||||
|
|
||||||
|
# Handle --with-parens-nest-limit=N
|
||||||
|
AC_ARG_WITH(parens-nest-limit,
|
||||||
|
AS_HELP_STRING([--with-parens-nest-limit=N],
|
||||||
|
[nested parentheses limit (default=250)]),
|
||||||
|
, with_parens_nest_limit=250)
|
||||||
|
|
||||||
# Handle --with-match-limit=N
|
# Handle --with-match-limit=N
|
||||||
AC_ARG_WITH(match-limit,
|
AC_ARG_WITH(match-limit,
|
||||||
AS_HELP_STRING([--with-match-limit=N],
|
AS_HELP_STRING([--with-match-limit=N],
|
||||||
@ -427,24 +436,33 @@ AH_TOP([
|
|||||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||||
systems.
|
systems.
|
||||||
|
|
||||||
In environments that support the facilities, config.h.in is converted by
|
In environments that support the GNU autotools, config.h.in is converted into
|
||||||
"configure", or config-cmake.h.in is converted by CMake, into config.h. If you
|
config.h by the "configure" script. In environments that use CMake,
|
||||||
are going to build PCRE "by hand" without using "configure" or CMake, you
|
config-cmake.in is converted into config.h. If you are going to build PCRE "by
|
||||||
should copy the distributed config.h.generic to config.h, and then edit the
|
hand" without using "configure" or CMake, you should copy the distributed
|
||||||
macro definitions to be the way you need them. You must then add
|
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||||
-DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
|
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||||
at the start of every source.
|
so that config.h is included at the start of every source.
|
||||||
|
|
||||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||||
|
but if you do, default values will be taken from config.h for non-boolean
|
||||||
|
macros that are not defined on the command line.
|
||||||
|
|
||||||
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
|
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE8 should either be defined
|
||||||
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
|
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
|
||||||
them both to 0; an emulation function will be used. */])
|
macros are listed as a commented #undef in config.h.generic. Macros such as
|
||||||
|
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||||
|
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||||
|
|
||||||
|
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||||
|
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||||
|
sure both macros are undefined; an emulation function will then be used. */])
|
||||||
|
|
||||||
# Checks for header files.
|
# Checks for header files.
|
||||||
AC_HEADER_STDC
|
AC_HEADER_STDC
|
||||||
AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h windows.h)
|
AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h)
|
||||||
|
AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
|
||||||
|
|
||||||
# The files below are C++ header files.
|
# The files below are C++ header files.
|
||||||
pcre_have_type_traits="0"
|
pcre_have_type_traits="0"
|
||||||
@ -669,11 +687,15 @@ if test "$enable_pcre32" = "yes"; then
|
|||||||
Define to any value to enable the 32 bit PCRE library.])
|
Define to any value to enable the 32 bit PCRE library.])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Unless running under Windows, JIT support requires pthreads.
|
||||||
|
|
||||||
if test "$enable_jit" = "yes"; then
|
if test "$enable_jit" = "yes"; then
|
||||||
|
if test "$HAVE_WINDOWS_H" != "1"; then
|
||||||
AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])])
|
AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])])
|
||||||
CC="$PTHREAD_CC"
|
CC="$PTHREAD_CC"
|
||||||
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
|
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
|
||||||
LIBS="$PTHREAD_LIBS $LIBS"
|
LIBS="$PTHREAD_LIBS $LIBS"
|
||||||
|
fi
|
||||||
AC_DEFINE([SUPPORT_JIT], [], [
|
AC_DEFINE([SUPPORT_JIT], [], [
|
||||||
Define to any value to enable support for Just-In-Time compiling.])
|
Define to any value to enable support for Just-In-Time compiling.])
|
||||||
else
|
else
|
||||||
@ -722,7 +744,12 @@ if test "$enable_pcregrep_libbz2" = "yes"; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if test $with_pcregrep_bufsize -lt 8192 ; then
|
if test $with_pcregrep_bufsize -lt 8192 ; then
|
||||||
|
AC_MSG_WARN([$with_pcregrep_bufsize is too small for --with-pcregrep-bufsize; using 8192])
|
||||||
with_pcregrep_bufsize="8192"
|
with_pcregrep_bufsize="8192"
|
||||||
|
else
|
||||||
|
if test $? -gt 1 ; then
|
||||||
|
AC_MSG_ERROR([Bad value for --with-pcregrep-bufsize])
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
AC_DEFINE_UNQUOTED([PCREGREP_BUFSIZE], [$with_pcregrep_bufsize], [
|
AC_DEFINE_UNQUOTED([PCREGREP_BUFSIZE], [$with_pcregrep_bufsize], [
|
||||||
@ -773,6 +800,11 @@ AC_DEFINE_UNQUOTED([POSIX_MALLOC_THRESHOLD], [$with_posix_malloc_threshold], [
|
|||||||
faster than using malloc() for each call. The threshold above which
|
faster than using malloc() for each call. The threshold above which
|
||||||
the stack is no longer used is defined by POSIX_MALLOC_THRESHOLD.])
|
the stack is no longer used is defined by POSIX_MALLOC_THRESHOLD.])
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
|
||||||
|
The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||||
|
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||||
|
stack that is used while compiling a pattern.])
|
||||||
|
|
||||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
||||||
The value of MATCH_LIMIT determines the default number of times the
|
The value of MATCH_LIMIT determines the default number of times the
|
||||||
internal match() function can be called during a single execution of
|
internal match() function can be called during a single execution of
|
||||||
@ -838,7 +870,7 @@ fi
|
|||||||
|
|
||||||
if test "$enable_valgrind" = "yes"; then
|
if test "$enable_valgrind" = "yes"; then
|
||||||
AC_DEFINE_UNQUOTED([SUPPORT_VALGRIND], [], [
|
AC_DEFINE_UNQUOTED([SUPPORT_VALGRIND], [], [
|
||||||
Valgrind support to find invalid memory reads.])
|
Define to any value for valgrind support to find invalid memory reads.])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Platform specific issues
|
# Platform specific issues
|
||||||
@ -946,7 +978,7 @@ if test "$enable_pcretest_libreadline" = "yes"; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check for valgrind
|
# Handle valgrind support
|
||||||
|
|
||||||
if test "$enable_valgrind" = "yes"; then
|
if test "$enable_valgrind" = "yes"; then
|
||||||
m4_ifdef([PKG_CHECK_MODULES],
|
m4_ifdef([PKG_CHECK_MODULES],
|
||||||
@ -954,7 +986,7 @@ if test "$enable_valgrind" = "yes"; then
|
|||||||
[AC_MSG_ERROR([pkg-config not supported])])
|
[AC_MSG_ERROR([pkg-config not supported])])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# test code coverage reporting
|
# Handle code coverage reporting support
|
||||||
if test "$enable_coverage" = "yes"; then
|
if test "$enable_coverage" = "yes"; then
|
||||||
if test "x$GCC" != "xyes"; then
|
if test "x$GCC" != "xyes"; then
|
||||||
AC_MSG_ERROR([Code coverage reports can only be generated when using GCC])
|
AC_MSG_ERROR([Code coverage reports can only be generated when using GCC])
|
||||||
@ -985,11 +1017,7 @@ if test "$enable_coverage" = "yes"; then
|
|||||||
AC_MSG_ERROR([genhtml not found])
|
AC_MSG_ERROR([genhtml not found])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
AC_DEFINE([SUPPORT_GCOV],[1], [
|
# Set flags needed for gcov
|
||||||
Define to allow pcretest and pcregrep to be linked with gcov, so that they
|
|
||||||
are able to generate code coverage reports.])
|
|
||||||
|
|
||||||
# And add flags needed for gcov
|
|
||||||
GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||||
GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||||
GCOV_LIBS="-lgcov"
|
GCOV_LIBS="-lgcov"
|
||||||
@ -1064,6 +1092,7 @@ $PACKAGE-$VERSION configuration summary:
|
|||||||
Use stack recursion ............. : ${enable_stack_for_recursion}
|
Use stack recursion ............. : ${enable_stack_for_recursion}
|
||||||
POSIX mem threshold ............. : ${with_posix_malloc_threshold}
|
POSIX mem threshold ............. : ${with_posix_malloc_threshold}
|
||||||
Internal link size .............. : ${with_link_size}
|
Internal link size .............. : ${with_link_size}
|
||||||
|
Nested parentheses limit ........ : ${with_parens_nest_limit}
|
||||||
Match limit ..................... : ${with_match_limit}
|
Match limit ..................... : ${with_match_limit}
|
||||||
Match limit recursion ........... : ${with_match_limit_recursion}
|
Match limit recursion ........... : ${with_match_limit_recursion}
|
||||||
Build shared libs ............... : ${enable_shared}
|
Build shared libs ............... : ${enable_shared}
|
||||||
|
@ -1,10 +1,9 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# depcomp - compile a program generating dependencies as side-effects
|
# depcomp - compile a program generating dependencies as side-effects
|
||||||
|
|
||||||
scriptversion=2012-03-27.16; # UTC
|
scriptversion=2013-05-30.07; # UTC
|
||||||
|
|
||||||
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007, 2009, 2010,
|
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
|
||||||
# 2011, 2012 Free Software Foundation, Inc.
|
|
||||||
|
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
@ -57,11 +56,65 @@ EOF
|
|||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
# Get the directory component of the given path, and save it in the
|
||||||
|
# global variables '$dir'. Note that this directory component will
|
||||||
|
# be either empty or ending with a '/' character. This is deliberate.
|
||||||
|
set_dir_from ()
|
||||||
|
{
|
||||||
|
case $1 in
|
||||||
|
*/*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
|
||||||
|
*) dir=;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get the suffix-stripped basename of the given path, and save it the
|
||||||
|
# global variable '$base'.
|
||||||
|
set_base_from ()
|
||||||
|
{
|
||||||
|
base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
|
||||||
|
}
|
||||||
|
|
||||||
|
# If no dependency file was actually created by the compiler invocation,
|
||||||
|
# we still have to create a dummy depfile, to avoid errors with the
|
||||||
|
# Makefile "include basename.Plo" scheme.
|
||||||
|
make_dummy_depfile ()
|
||||||
|
{
|
||||||
|
echo "#dummy" > "$depfile"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Factor out some common post-processing of the generated depfile.
|
||||||
|
# Requires the auxiliary global variable '$tmpdepfile' to be set.
|
||||||
|
aix_post_process_depfile ()
|
||||||
|
{
|
||||||
|
# If the compiler actually managed to produce a dependency file,
|
||||||
|
# post-process it.
|
||||||
|
if test -f "$tmpdepfile"; then
|
||||||
|
# Each line is of the form 'foo.o: dependency.h'.
|
||||||
|
# Do two passes, one to just change these to
|
||||||
|
# $object: dependency.h
|
||||||
|
# and one to simply output
|
||||||
|
# dependency.h:
|
||||||
|
# which is needed to avoid the deleted-header problem.
|
||||||
|
{ sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
|
||||||
|
sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
|
||||||
|
} > "$depfile"
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
else
|
||||||
|
make_dummy_depfile
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
# A tabulation character.
|
# A tabulation character.
|
||||||
tab=' '
|
tab=' '
|
||||||
# A newline character.
|
# A newline character.
|
||||||
nl='
|
nl='
|
||||||
'
|
'
|
||||||
|
# Character ranges might be problematic outside the C locale.
|
||||||
|
# These definitions help.
|
||||||
|
upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||||
|
lower=abcdefghijklmnopqrstuvwxyz
|
||||||
|
digits=0123456789
|
||||||
|
alpha=${upper}${lower}
|
||||||
|
|
||||||
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
|
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
|
||||||
echo "depcomp: Variables source, object and depmode must be set" 1>&2
|
echo "depcomp: Variables source, object and depmode must be set" 1>&2
|
||||||
@ -75,6 +128,9 @@ tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
|
|||||||
|
|
||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
|
|
||||||
|
# Avoid interferences from the environment.
|
||||||
|
gccflag= dashmflag=
|
||||||
|
|
||||||
# Some modes work just like other modes, but use different flags. We
|
# Some modes work just like other modes, but use different flags. We
|
||||||
# parameterize here, but still list the modes in the big case below,
|
# parameterize here, but still list the modes in the big case below,
|
||||||
# to make depend.m4 easier to write. Note that we *cannot* use a case
|
# to make depend.m4 easier to write. Note that we *cannot* use a case
|
||||||
@ -109,7 +165,7 @@ if test "$depmode" = msvc7msys; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if test "$depmode" = xlc; then
|
if test "$depmode" = xlc; then
|
||||||
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency informations.
|
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
|
||||||
gccflag=-qmakedep=gcc,-MF
|
gccflag=-qmakedep=gcc,-MF
|
||||||
depmode=gcc
|
depmode=gcc
|
||||||
fi
|
fi
|
||||||
@ -134,8 +190,7 @@ gcc3)
|
|||||||
done
|
done
|
||||||
"$@"
|
"$@"
|
||||||
stat=$?
|
stat=$?
|
||||||
if test $stat -eq 0; then :
|
if test $stat -ne 0; then
|
||||||
else
|
|
||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
exit $stat
|
exit $stat
|
||||||
fi
|
fi
|
||||||
@ -143,13 +198,17 @@ gcc3)
|
|||||||
;;
|
;;
|
||||||
|
|
||||||
gcc)
|
gcc)
|
||||||
|
## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
|
||||||
|
## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
|
||||||
|
## (see the conditional assignment to $gccflag above).
|
||||||
## There are various ways to get dependency output from gcc. Here's
|
## There are various ways to get dependency output from gcc. Here's
|
||||||
## why we pick this rather obscure method:
|
## why we pick this rather obscure method:
|
||||||
## - Don't want to use -MD because we'd like the dependencies to end
|
## - Don't want to use -MD because we'd like the dependencies to end
|
||||||
## up in a subdir. Having to rename by hand is ugly.
|
## up in a subdir. Having to rename by hand is ugly.
|
||||||
## (We might end up doing this anyway to support other compilers.)
|
## (We might end up doing this anyway to support other compilers.)
|
||||||
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
|
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
|
||||||
## -MM, not -M (despite what the docs say).
|
## -MM, not -M (despite what the docs say). Also, it might not be
|
||||||
|
## supported by the other compilers which use the 'gcc' depmode.
|
||||||
## - Using -M directly means running the compiler twice (even worse
|
## - Using -M directly means running the compiler twice (even worse
|
||||||
## than renaming).
|
## than renaming).
|
||||||
if test -z "$gccflag"; then
|
if test -z "$gccflag"; then
|
||||||
@ -157,15 +216,14 @@ gcc)
|
|||||||
fi
|
fi
|
||||||
"$@" -Wp,"$gccflag$tmpdepfile"
|
"$@" -Wp,"$gccflag$tmpdepfile"
|
||||||
stat=$?
|
stat=$?
|
||||||
if test $stat -eq 0; then :
|
if test $stat -ne 0; then
|
||||||
else
|
|
||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
exit $stat
|
exit $stat
|
||||||
fi
|
fi
|
||||||
rm -f "$depfile"
|
rm -f "$depfile"
|
||||||
echo "$object : \\" > "$depfile"
|
echo "$object : \\" > "$depfile"
|
||||||
alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
|
# The second -e expression handles DOS-style file names with drive
|
||||||
## The second -e expression handles DOS-style file names with drive letters.
|
# letters.
|
||||||
sed -e 's/^[^:]*: / /' \
|
sed -e 's/^[^:]*: / /' \
|
||||||
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
|
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
|
||||||
## This next piece of magic avoids the "deleted header file" problem.
|
## This next piece of magic avoids the "deleted header file" problem.
|
||||||
@ -174,14 +232,14 @@ gcc)
|
|||||||
## typically no way to rebuild the header). We avoid this by adding
|
## typically no way to rebuild the header). We avoid this by adding
|
||||||
## dummy dependencies for each header file. Too bad gcc doesn't do
|
## dummy dependencies for each header file. Too bad gcc doesn't do
|
||||||
## this for us directly.
|
## this for us directly.
|
||||||
tr ' ' "$nl" < "$tmpdepfile" |
|
|
||||||
## Some versions of gcc put a space before the ':'. On the theory
|
## Some versions of gcc put a space before the ':'. On the theory
|
||||||
## that the space means something, we add a space to the output as
|
## that the space means something, we add a space to the output as
|
||||||
## well. hp depmode also adds that space, but also prefixes the VPATH
|
## well. hp depmode also adds that space, but also prefixes the VPATH
|
||||||
## to the object. Take care to not repeat it in the output.
|
## to the object. Take care to not repeat it in the output.
|
||||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||||
## correctly. Breaking it into two sed invocations is a workaround.
|
## correctly. Breaking it into two sed invocations is a workaround.
|
||||||
sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
|
tr ' ' "$nl" < "$tmpdepfile" \
|
||||||
|
| sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
|
||||||
| sed -e 's/$/ :/' >> "$depfile"
|
| sed -e 's/$/ :/' >> "$depfile"
|
||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
;;
|
;;
|
||||||
@ -200,8 +258,7 @@ sgi)
|
|||||||
"$@" -MDupdate "$tmpdepfile"
|
"$@" -MDupdate "$tmpdepfile"
|
||||||
fi
|
fi
|
||||||
stat=$?
|
stat=$?
|
||||||
if test $stat -eq 0; then :
|
if test $stat -ne 0; then
|
||||||
else
|
|
||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
exit $stat
|
exit $stat
|
||||||
fi
|
fi
|
||||||
@ -209,7 +266,6 @@ sgi)
|
|||||||
|
|
||||||
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
|
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
|
||||||
echo "$object : \\" > "$depfile"
|
echo "$object : \\" > "$depfile"
|
||||||
|
|
||||||
# Clip off the initial element (the dependent). Don't try to be
|
# Clip off the initial element (the dependent). Don't try to be
|
||||||
# clever and replace this with sed code, as IRIX sed won't handle
|
# clever and replace this with sed code, as IRIX sed won't handle
|
||||||
# lines with more than a fixed number of characters (4096 in
|
# lines with more than a fixed number of characters (4096 in
|
||||||
@ -217,19 +273,15 @@ sgi)
|
|||||||
# the IRIX cc adds comments like '#:fec' to the end of the
|
# the IRIX cc adds comments like '#:fec' to the end of the
|
||||||
# dependency line.
|
# dependency line.
|
||||||
tr ' ' "$nl" < "$tmpdepfile" \
|
tr ' ' "$nl" < "$tmpdepfile" \
|
||||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
|
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
|
||||||
tr "$nl" ' ' >> "$depfile"
|
| tr "$nl" ' ' >> "$depfile"
|
||||||
echo >> "$depfile"
|
echo >> "$depfile"
|
||||||
|
|
||||||
# The second pass generates a dummy entry for each header file.
|
# The second pass generates a dummy entry for each header file.
|
||||||
tr ' ' "$nl" < "$tmpdepfile" \
|
tr ' ' "$nl" < "$tmpdepfile" \
|
||||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
|
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
|
||||||
>> "$depfile"
|
>> "$depfile"
|
||||||
else
|
else
|
||||||
# The sourcefile does not contain any dependencies, so just
|
make_dummy_depfile
|
||||||
# store a dummy comment line, to avoid errors with the Makefile
|
|
||||||
# "include basename.Plo" scheme.
|
|
||||||
echo "#dummy" > "$depfile"
|
|
||||||
fi
|
fi
|
||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
;;
|
;;
|
||||||
@ -247,9 +299,8 @@ aix)
|
|||||||
# current directory. Also, the AIX compiler puts '$object:' at the
|
# current directory. Also, the AIX compiler puts '$object:' at the
|
||||||
# start of each line; $object doesn't have directory information.
|
# start of each line; $object doesn't have directory information.
|
||||||
# Version 6 uses the directory in both cases.
|
# Version 6 uses the directory in both cases.
|
||||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
set_dir_from "$object"
|
||||||
test "x$dir" = "x$object" && dir=
|
set_base_from "$object"
|
||||||
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
|
||||||
if test "$libtool" = yes; then
|
if test "$libtool" = yes; then
|
||||||
tmpdepfile1=$dir$base.u
|
tmpdepfile1=$dir$base.u
|
||||||
tmpdepfile2=$base.u
|
tmpdepfile2=$base.u
|
||||||
@ -262,9 +313,7 @@ aix)
|
|||||||
"$@" -M
|
"$@" -M
|
||||||
fi
|
fi
|
||||||
stat=$?
|
stat=$?
|
||||||
|
if test $stat -ne 0; then
|
||||||
if test $stat -eq 0; then :
|
|
||||||
else
|
|
||||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||||
exit $stat
|
exit $stat
|
||||||
fi
|
fi
|
||||||
@ -273,65 +322,113 @@ aix)
|
|||||||
do
|
do
|
||||||
test -f "$tmpdepfile" && break
|
test -f "$tmpdepfile" && break
|
||||||
done
|
done
|
||||||
if test -f "$tmpdepfile"; then
|
aix_post_process_depfile
|
||||||
# Each line is of the form 'foo.o: dependent.h'.
|
;;
|
||||||
# Do two passes, one to just change these to
|
|
||||||
# '$object: dependent.h' and one to simply 'dependent.h:'.
|
tcc)
|
||||||
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
|
# tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
|
||||||
sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
|
# FIXME: That version still under development at the moment of writing.
|
||||||
else
|
# Make that this statement remains true also for stable, released
|
||||||
# The sourcefile does not contain any dependencies, so just
|
# versions.
|
||||||
# store a dummy comment line, to avoid errors with the Makefile
|
# It will wrap lines (doesn't matter whether long or short) with a
|
||||||
# "include basename.Plo" scheme.
|
# trailing '\', as in:
|
||||||
echo "#dummy" > "$depfile"
|
#
|
||||||
|
# foo.o : \
|
||||||
|
# foo.c \
|
||||||
|
# foo.h \
|
||||||
|
#
|
||||||
|
# It will put a trailing '\' even on the last line, and will use leading
|
||||||
|
# spaces rather than leading tabs (at least since its commit 0394caf7
|
||||||
|
# "Emit spaces for -MD").
|
||||||
|
"$@" -MD -MF "$tmpdepfile"
|
||||||
|
stat=$?
|
||||||
|
if test $stat -ne 0; then
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
exit $stat
|
||||||
fi
|
fi
|
||||||
|
rm -f "$depfile"
|
||||||
|
# Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
|
||||||
|
# We have to change lines of the first kind to '$object: \'.
|
||||||
|
sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
|
||||||
|
# And for each line of the second kind, we have to emit a 'dep.h:'
|
||||||
|
# dummy dependency, to avoid the deleted-header problem.
|
||||||
|
sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
|
||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
;;
|
;;
|
||||||
|
|
||||||
icc)
|
## The order of this option in the case statement is important, since the
|
||||||
# Intel's C compiler anf tcc (Tiny C Compiler) understand '-MD -MF file'.
|
## shell code in configure will try each of these formats in the order
|
||||||
# However on
|
## listed in this file. A plain '-MD' option would be understood by many
|
||||||
# $CC -MD -MF foo.d -c -o sub/foo.o sub/foo.c
|
## compilers, so we must ensure this comes after the gcc and icc options.
|
||||||
# ICC 7.0 will fill foo.d with something like
|
pgcc)
|
||||||
# foo.o: sub/foo.c
|
# Portland's C compiler understands '-MD'.
|
||||||
# foo.o: sub/foo.h
|
# Will always output deps to 'file.d' where file is the root name of the
|
||||||
# which is wrong. We want
|
# source file under compilation, even if file resides in a subdirectory.
|
||||||
# sub/foo.o: sub/foo.c
|
# The object file name does not affect the name of the '.d' file.
|
||||||
# sub/foo.o: sub/foo.h
|
# pgcc 10.2 will output
|
||||||
# sub/foo.c:
|
|
||||||
# sub/foo.h:
|
|
||||||
# ICC 7.1 will output
|
|
||||||
# foo.o: sub/foo.c sub/foo.h
|
# foo.o: sub/foo.c sub/foo.h
|
||||||
# and will wrap long lines using '\' :
|
# and will wrap long lines using '\' :
|
||||||
# foo.o: sub/foo.c ... \
|
# foo.o: sub/foo.c ... \
|
||||||
# sub/foo.h ... \
|
# sub/foo.h ... \
|
||||||
# ...
|
# ...
|
||||||
# tcc 0.9.26 (FIXME still under development at the moment of writing)
|
set_dir_from "$object"
|
||||||
# will emit a similar output, but also prepend the continuation lines
|
# Use the source, not the object, to determine the base name, since
|
||||||
# with horizontal tabulation characters.
|
# that's sadly what pgcc will do too.
|
||||||
"$@" -MD -MF "$tmpdepfile"
|
set_base_from "$source"
|
||||||
|
tmpdepfile=$base.d
|
||||||
|
|
||||||
|
# For projects that build the same source file twice into different object
|
||||||
|
# files, the pgcc approach of using the *source* file root name can cause
|
||||||
|
# problems in parallel builds. Use a locking strategy to avoid stomping on
|
||||||
|
# the same $tmpdepfile.
|
||||||
|
lockdir=$base.d-lock
|
||||||
|
trap "
|
||||||
|
echo '$0: caught signal, cleaning up...' >&2
|
||||||
|
rmdir '$lockdir'
|
||||||
|
exit 1
|
||||||
|
" 1 2 13 15
|
||||||
|
numtries=100
|
||||||
|
i=$numtries
|
||||||
|
while test $i -gt 0; do
|
||||||
|
# mkdir is a portable test-and-set.
|
||||||
|
if mkdir "$lockdir" 2>/dev/null; then
|
||||||
|
# This process acquired the lock.
|
||||||
|
"$@" -MD
|
||||||
stat=$?
|
stat=$?
|
||||||
if test $stat -eq 0; then :
|
# Release the lock.
|
||||||
|
rmdir "$lockdir"
|
||||||
|
break
|
||||||
else
|
else
|
||||||
|
# If the lock is being held by a different process, wait
|
||||||
|
# until the winning process is done or we timeout.
|
||||||
|
while test -d "$lockdir" && test $i -gt 0; do
|
||||||
|
sleep 1
|
||||||
|
i=`expr $i - 1`
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
i=`expr $i - 1`
|
||||||
|
done
|
||||||
|
trap - 1 2 13 15
|
||||||
|
if test $i -le 0; then
|
||||||
|
echo "$0: failed to acquire lock after $numtries attempts" >&2
|
||||||
|
echo "$0: check lockdir '$lockdir'" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test $stat -ne 0; then
|
||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
exit $stat
|
exit $stat
|
||||||
fi
|
fi
|
||||||
rm -f "$depfile"
|
rm -f "$depfile"
|
||||||
# Each line is of the form 'foo.o: dependent.h',
|
# Each line is of the form `foo.o: dependent.h',
|
||||||
# or 'foo.o: dep1.h dep2.h \', or ' dep3.h dep4.h \'.
|
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
|
||||||
# Do two passes, one to just change these to
|
# Do two passes, one to just change these to
|
||||||
# '$object: dependent.h' and one to simply 'dependent.h:'.
|
# `$object: dependent.h' and one to simply `dependent.h:'.
|
||||||
sed -e "s/^[ $tab][ $tab]*/ /" -e "s,^[^:]*:,$object :," \
|
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
|
||||||
< "$tmpdepfile" > "$depfile"
|
# Some versions of the HPUX 10.20 sed can't process this invocation
|
||||||
sed '
|
# correctly. Breaking it into two sed invocations is a workaround.
|
||||||
s/[ '"$tab"'][ '"$tab"']*/ /g
|
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
|
||||||
s/^ *//
|
| sed -e 's/$/ :/' >> "$depfile"
|
||||||
s/ *\\*$//
|
|
||||||
s/^[^:]*: *//
|
|
||||||
/^$/d
|
|
||||||
/:$/d
|
|
||||||
s/$/ :/
|
|
||||||
' < "$tmpdepfile" >> "$depfile"
|
|
||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
;;
|
;;
|
||||||
|
|
||||||
@ -342,9 +439,8 @@ hp2)
|
|||||||
# 'foo.d', which lands next to the object file, wherever that
|
# 'foo.d', which lands next to the object file, wherever that
|
||||||
# happens to be.
|
# happens to be.
|
||||||
# Much of this is similar to the tru64 case; see comments there.
|
# Much of this is similar to the tru64 case; see comments there.
|
||||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
set_dir_from "$object"
|
||||||
test "x$dir" = "x$object" && dir=
|
set_base_from "$object"
|
||||||
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
|
||||||
if test "$libtool" = yes; then
|
if test "$libtool" = yes; then
|
||||||
tmpdepfile1=$dir$base.d
|
tmpdepfile1=$dir$base.d
|
||||||
tmpdepfile2=$dir.libs/$base.d
|
tmpdepfile2=$dir.libs/$base.d
|
||||||
@ -355,8 +451,7 @@ hp2)
|
|||||||
"$@" +Maked
|
"$@" +Maked
|
||||||
fi
|
fi
|
||||||
stat=$?
|
stat=$?
|
||||||
if test $stat -eq 0; then :
|
if test $stat -ne 0; then
|
||||||
else
|
|
||||||
rm -f "$tmpdepfile1" "$tmpdepfile2"
|
rm -f "$tmpdepfile1" "$tmpdepfile2"
|
||||||
exit $stat
|
exit $stat
|
||||||
fi
|
fi
|
||||||
@ -366,7 +461,7 @@ hp2)
|
|||||||
test -f "$tmpdepfile" && break
|
test -f "$tmpdepfile" && break
|
||||||
done
|
done
|
||||||
if test -f "$tmpdepfile"; then
|
if test -f "$tmpdepfile"; then
|
||||||
sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile"
|
sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
|
||||||
# Add 'dependent.h:' lines.
|
# Add 'dependent.h:' lines.
|
||||||
sed -ne '2,${
|
sed -ne '2,${
|
||||||
s/^ *//
|
s/^ *//
|
||||||
@ -375,7 +470,7 @@ hp2)
|
|||||||
p
|
p
|
||||||
}' "$tmpdepfile" >> "$depfile"
|
}' "$tmpdepfile" >> "$depfile"
|
||||||
else
|
else
|
||||||
echo "#dummy" > "$depfile"
|
make_dummy_depfile
|
||||||
fi
|
fi
|
||||||
rm -f "$tmpdepfile" "$tmpdepfile2"
|
rm -f "$tmpdepfile" "$tmpdepfile2"
|
||||||
;;
|
;;
|
||||||
@ -386,55 +481,40 @@ tru64)
|
|||||||
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
|
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
|
||||||
# dependencies in 'foo.d' instead, so we check for that too.
|
# dependencies in 'foo.d' instead, so we check for that too.
|
||||||
# Subdirectories are respected.
|
# Subdirectories are respected.
|
||||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
set_dir_from "$object"
|
||||||
test "x$dir" = "x$object" && dir=
|
set_base_from "$object"
|
||||||
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
|
||||||
|
|
||||||
if test "$libtool" = yes; then
|
if test "$libtool" = yes; then
|
||||||
# With Tru64 cc, shared objects can also be used to make a
|
# Libtool generates 2 separate objects for the 2 libraries. These
|
||||||
# static library. This mechanism is used in libtool 1.4 series to
|
# two compilations output dependencies in $dir.libs/$base.o.d and
|
||||||
# handle both shared and static libraries in a single compilation.
|
|
||||||
# With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d.
|
|
||||||
#
|
|
||||||
# With libtool 1.5 this exception was removed, and libtool now
|
|
||||||
# generates 2 separate objects for the 2 libraries. These two
|
|
||||||
# compilations output dependencies in $dir.libs/$base.o.d and
|
|
||||||
# in $dir$base.o.d. We have to check for both files, because
|
# in $dir$base.o.d. We have to check for both files, because
|
||||||
# one of the two compilations can be disabled. We should prefer
|
# one of the two compilations can be disabled. We should prefer
|
||||||
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
|
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
|
||||||
# automatically cleaned when .libs/ is deleted, while ignoring
|
# automatically cleaned when .libs/ is deleted, while ignoring
|
||||||
# the former would cause a distcleancheck panic.
|
# the former would cause a distcleancheck panic.
|
||||||
tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4
|
tmpdepfile1=$dir$base.o.d # libtool 1.5
|
||||||
tmpdepfile2=$dir$base.o.d # libtool 1.5
|
tmpdepfile2=$dir.libs/$base.o.d # Likewise.
|
||||||
tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5
|
tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504
|
||||||
tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504
|
|
||||||
"$@" -Wc,-MD
|
"$@" -Wc,-MD
|
||||||
else
|
else
|
||||||
tmpdepfile1=$dir$base.o.d
|
tmpdepfile1=$dir$base.d
|
||||||
tmpdepfile2=$dir$base.d
|
tmpdepfile2=$dir$base.d
|
||||||
tmpdepfile3=$dir$base.d
|
tmpdepfile3=$dir$base.d
|
||||||
tmpdepfile4=$dir$base.d
|
|
||||||
"$@" -MD
|
"$@" -MD
|
||||||
fi
|
fi
|
||||||
|
|
||||||
stat=$?
|
stat=$?
|
||||||
if test $stat -eq 0; then :
|
if test $stat -ne 0; then
|
||||||
else
|
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
|
|
||||||
exit $stat
|
exit $stat
|
||||||
fi
|
fi
|
||||||
|
|
||||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
|
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||||
do
|
do
|
||||||
test -f "$tmpdepfile" && break
|
test -f "$tmpdepfile" && break
|
||||||
done
|
done
|
||||||
if test -f "$tmpdepfile"; then
|
# Same post-processing that is required for AIX mode.
|
||||||
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
|
aix_post_process_depfile
|
||||||
sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
|
|
||||||
else
|
|
||||||
echo "#dummy" > "$depfile"
|
|
||||||
fi
|
|
||||||
rm -f "$tmpdepfile"
|
|
||||||
;;
|
;;
|
||||||
|
|
||||||
msvc7)
|
msvc7)
|
||||||
@ -446,8 +526,7 @@ msvc7)
|
|||||||
"$@" $showIncludes > "$tmpdepfile"
|
"$@" $showIncludes > "$tmpdepfile"
|
||||||
stat=$?
|
stat=$?
|
||||||
grep -v '^Note: including file: ' "$tmpdepfile"
|
grep -v '^Note: including file: ' "$tmpdepfile"
|
||||||
if test "$stat" = 0; then :
|
if test $stat -ne 0; then
|
||||||
else
|
|
||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
exit $stat
|
exit $stat
|
||||||
fi
|
fi
|
||||||
@ -473,6 +552,7 @@ $ {
|
|||||||
G
|
G
|
||||||
p
|
p
|
||||||
}' >> "$depfile"
|
}' >> "$depfile"
|
||||||
|
echo >> "$depfile" # make sure the fragment doesn't end with a backslash
|
||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
;;
|
;;
|
||||||
|
|
||||||
@ -524,13 +604,14 @@ dashmstdout)
|
|||||||
# in the target name. This is to cope with DOS-style filenames:
|
# in the target name. This is to cope with DOS-style filenames:
|
||||||
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
|
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
|
||||||
"$@" $dashmflag |
|
"$@" $dashmflag |
|
||||||
sed 's:^['"$tab"' ]*[^:'"$tab"' ][^:][^:]*\:['"$tab"' ]*:'"$object"'\: :' > "$tmpdepfile"
|
sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
|
||||||
rm -f "$depfile"
|
rm -f "$depfile"
|
||||||
cat < "$tmpdepfile" > "$depfile"
|
cat < "$tmpdepfile" > "$depfile"
|
||||||
tr ' ' "$nl" < "$tmpdepfile" | \
|
# Some versions of the HPUX 10.20 sed can't process this sed invocation
|
||||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
# correctly. Breaking it into two sed invocations is a workaround.
|
||||||
## correctly. Breaking it into two sed invocations is a workaround.
|
tr ' ' "$nl" < "$tmpdepfile" \
|
||||||
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
|
||||||
|
| sed -e 's/$/ :/' >> "$depfile"
|
||||||
rm -f "$tmpdepfile"
|
rm -f "$tmpdepfile"
|
||||||
;;
|
;;
|
||||||
|
|
||||||
@ -583,10 +664,12 @@ makedepend)
|
|||||||
# makedepend may prepend the VPATH from the source file name to the object.
|
# makedepend may prepend the VPATH from the source file name to the object.
|
||||||
# No need to regex-escape $object, excess matching of '.' is harmless.
|
# No need to regex-escape $object, excess matching of '.' is harmless.
|
||||||
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
|
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
|
||||||
sed '1,2d' "$tmpdepfile" | tr ' ' "$nl" | \
|
# Some versions of the HPUX 10.20 sed can't process the last invocation
|
||||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
# correctly. Breaking it into two sed invocations is a workaround.
|
||||||
## correctly. Breaking it into two sed invocations is a workaround.
|
sed '1,2d' "$tmpdepfile" \
|
||||||
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
| tr ' ' "$nl" \
|
||||||
|
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
|
||||||
|
| sed -e 's/$/ :/' >> "$depfile"
|
||||||
rm -f "$tmpdepfile" "$tmpdepfile".bak
|
rm -f "$tmpdepfile" "$tmpdepfile".bak
|
||||||
;;
|
;;
|
||||||
|
|
||||||
@ -622,10 +705,10 @@ cpp)
|
|||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
"$@" -E |
|
"$@" -E \
|
||||||
sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
| sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||||
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
|
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||||
sed '$ s: \\$::' > "$tmpdepfile"
|
| sed '$ s: \\$::' > "$tmpdepfile"
|
||||||
rm -f "$depfile"
|
rm -f "$depfile"
|
||||||
echo "$object : \\" > "$depfile"
|
echo "$object : \\" > "$depfile"
|
||||||
cat < "$tmpdepfile" >> "$depfile"
|
cat < "$tmpdepfile" >> "$depfile"
|
||||||
|
764
tools/pcre/doc/html/NON-AUTOTOOLS-BUILD.txt
Normal file
764
tools/pcre/doc/html/NON-AUTOTOOLS-BUILD.txt
Normal file
@ -0,0 +1,764 @@
|
|||||||
|
Building PCRE without using autotools
|
||||||
|
-------------------------------------
|
||||||
|
|
||||||
|
This document contains the following sections:
|
||||||
|
|
||||||
|
General
|
||||||
|
Generic instructions for the PCRE C library
|
||||||
|
The C++ wrapper functions
|
||||||
|
Building for virtual Pascal
|
||||||
|
Stack size in Windows environments
|
||||||
|
Linking programs in Windows environments
|
||||||
|
Calling conventions in Windows environments
|
||||||
|
Comments about Win32 builds
|
||||||
|
Building PCRE on Windows with CMake
|
||||||
|
Use of relative paths with CMake on Windows
|
||||||
|
Testing with RunTest.bat
|
||||||
|
Building under Windows CE with Visual Studio 200x
|
||||||
|
Building under Windows with BCC5.5
|
||||||
|
Building using Borland C++ Builder 2007 (CB2007) and higher
|
||||||
|
Building PCRE on OpenVMS
|
||||||
|
Building PCRE on Stratus OpenVOS
|
||||||
|
Building PCRE on native z/OS and z/VM
|
||||||
|
|
||||||
|
|
||||||
|
GENERAL
|
||||||
|
|
||||||
|
I (Philip Hazel) have no experience of Windows or VMS sytems and how their
|
||||||
|
libraries work. The items in the PCRE distribution and Makefile that relate to
|
||||||
|
anything other than Linux systems are untested by me.
|
||||||
|
|
||||||
|
There are some other comments and files (including some documentation in CHM
|
||||||
|
format) in the Contrib directory on the FTP site:
|
||||||
|
|
||||||
|
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
|
||||||
|
|
||||||
|
The basic PCRE library consists entirely of code written in Standard C, and so
|
||||||
|
should compile successfully on any system that has a Standard C compiler and
|
||||||
|
library. The C++ wrapper functions are a separate issue (see below).
|
||||||
|
|
||||||
|
The PCRE distribution includes a "configure" file for use by the configure/make
|
||||||
|
(autotools) build system, as found in many Unix-like environments. The README
|
||||||
|
file contains information about the options for "configure".
|
||||||
|
|
||||||
|
There is also support for CMake, which some users prefer, especially in Windows
|
||||||
|
environments, though it can also be run in Unix-like environments. See the
|
||||||
|
section entitled "Building PCRE on Windows with CMake" below.
|
||||||
|
|
||||||
|
Versions of config.h and pcre.h are distributed in the PCRE tarballs under the
|
||||||
|
names config.h.generic and pcre.h.generic. These are provided for those who
|
||||||
|
build PCRE without using "configure" or CMake. If you use "configure" or CMake,
|
||||||
|
the .generic versions are not used.
|
||||||
|
|
||||||
|
|
||||||
|
GENERIC INSTRUCTIONS FOR THE PCRE C LIBRARY
|
||||||
|
|
||||||
|
The following are generic instructions for building the PCRE C library "by
|
||||||
|
hand". If you are going to use CMake, this section does not apply to you; you
|
||||||
|
can skip ahead to the CMake section.
|
||||||
|
|
||||||
|
(1) Copy or rename the file config.h.generic as config.h, and edit the macro
|
||||||
|
settings that it contains to whatever is appropriate for your environment.
|
||||||
|
|
||||||
|
In particular, you can alter the definition of the NEWLINE macro to
|
||||||
|
specify what character(s) you want to be interpreted as line terminators.
|
||||||
|
In an EBCDIC environment, you MUST change NEWLINE, because its default
|
||||||
|
value is 10, an ASCII LF. The usual EBCDIC newline character is 21 (0x15,
|
||||||
|
NL), though in some cases it may be 37 (0x25).
|
||||||
|
|
||||||
|
When you compile any of the PCRE modules, you must specify -DHAVE_CONFIG_H
|
||||||
|
to your compiler so that config.h is included in the sources.
|
||||||
|
|
||||||
|
An alternative approach is not to edit config.h, but to use -D on the
|
||||||
|
compiler command line to make any changes that you need to the
|
||||||
|
configuration options. In this case -DHAVE_CONFIG_H must not be set.
|
||||||
|
|
||||||
|
NOTE: There have been occasions when the way in which certain parameters
|
||||||
|
in config.h are used has changed between releases. (In the configure/make
|
||||||
|
world, this is handled automatically.) When upgrading to a new release,
|
||||||
|
you are strongly advised to review config.h.generic before re-using what
|
||||||
|
you had previously.
|
||||||
|
|
||||||
|
(2) Copy or rename the file pcre.h.generic as pcre.h.
|
||||||
|
|
||||||
|
(3) EITHER:
|
||||||
|
Copy or rename file pcre_chartables.c.dist as pcre_chartables.c.
|
||||||
|
|
||||||
|
OR:
|
||||||
|
Compile dftables.c as a stand-alone program (using -DHAVE_CONFIG_H if
|
||||||
|
you have set up config.h), and then run it with the single argument
|
||||||
|
"pcre_chartables.c". This generates a set of standard character tables
|
||||||
|
and writes them to that file. The tables are generated using the default
|
||||||
|
C locale for your system. If you want to use a locale that is specified
|
||||||
|
by LC_xxx environment variables, add the -L option to the dftables
|
||||||
|
command. You must use this method if you are building on a system that
|
||||||
|
uses EBCDIC code.
|
||||||
|
|
||||||
|
The tables in pcre_chartables.c are defaults. The caller of PCRE can
|
||||||
|
specify alternative tables at run time.
|
||||||
|
|
||||||
|
(4) Ensure that you have the following header files:
|
||||||
|
|
||||||
|
pcre_internal.h
|
||||||
|
ucp.h
|
||||||
|
|
||||||
|
(5) For an 8-bit library, compile the following source files, setting
|
||||||
|
-DHAVE_CONFIG_H as a compiler option if you have set up config.h with your
|
||||||
|
configuration, or else use other -D settings to change the configuration
|
||||||
|
as required.
|
||||||
|
|
||||||
|
pcre_byte_order.c
|
||||||
|
pcre_chartables.c
|
||||||
|
pcre_compile.c
|
||||||
|
pcre_config.c
|
||||||
|
pcre_dfa_exec.c
|
||||||
|
pcre_exec.c
|
||||||
|
pcre_fullinfo.c
|
||||||
|
pcre_get.c
|
||||||
|
pcre_globals.c
|
||||||
|
pcre_jit_compile.c
|
||||||
|
pcre_maketables.c
|
||||||
|
pcre_newline.c
|
||||||
|
pcre_ord2utf8.c
|
||||||
|
pcre_refcount.c
|
||||||
|
pcre_string_utils.c
|
||||||
|
pcre_study.c
|
||||||
|
pcre_tables.c
|
||||||
|
pcre_ucd.c
|
||||||
|
pcre_valid_utf8.c
|
||||||
|
pcre_version.c
|
||||||
|
pcre_xclass.c
|
||||||
|
|
||||||
|
Make sure that you include -I. in the compiler command (or equivalent for
|
||||||
|
an unusual compiler) so that all included PCRE header files are first
|
||||||
|
sought in the current directory. Otherwise you run the risk of picking up
|
||||||
|
a previously-installed file from somewhere else.
|
||||||
|
|
||||||
|
Note that you must still compile pcre_jit_compile.c, even if you have not
|
||||||
|
defined SUPPORT_JIT in config.h, because when JIT support is not
|
||||||
|
configured, dummy functions are compiled. When JIT support IS configured,
|
||||||
|
pcre_jit_compile.c #includes sources from the sljit subdirectory, where
|
||||||
|
there should be 16 files, all of whose names begin with "sljit".
|
||||||
|
|
||||||
|
(6) Now link all the compiled code into an object library in whichever form
|
||||||
|
your system keeps such libraries. This is the basic PCRE C 8-bit library.
|
||||||
|
If your system has static and shared libraries, you may have to do this
|
||||||
|
once for each type.
|
||||||
|
|
||||||
|
(7) If you want to build a 16-bit library (as well as, or instead of the 8-bit
|
||||||
|
or 32-bit libraries) repeat steps 5-6 with the following files:
|
||||||
|
|
||||||
|
pcre16_byte_order.c
|
||||||
|
pcre16_chartables.c
|
||||||
|
pcre16_compile.c
|
||||||
|
pcre16_config.c
|
||||||
|
pcre16_dfa_exec.c
|
||||||
|
pcre16_exec.c
|
||||||
|
pcre16_fullinfo.c
|
||||||
|
pcre16_get.c
|
||||||
|
pcre16_globals.c
|
||||||
|
pcre16_jit_compile.c
|
||||||
|
pcre16_maketables.c
|
||||||
|
pcre16_newline.c
|
||||||
|
pcre16_ord2utf16.c
|
||||||
|
pcre16_refcount.c
|
||||||
|
pcre16_string_utils.c
|
||||||
|
pcre16_study.c
|
||||||
|
pcre16_tables.c
|
||||||
|
pcre16_ucd.c
|
||||||
|
pcre16_utf16_utils.c
|
||||||
|
pcre16_valid_utf16.c
|
||||||
|
pcre16_version.c
|
||||||
|
pcre16_xclass.c
|
||||||
|
|
||||||
|
(8) If you want to build a 32-bit library (as well as, or instead of the 8-bit
|
||||||
|
or 16-bit libraries) repeat steps 5-6 with the following files:
|
||||||
|
|
||||||
|
pcre32_byte_order.c
|
||||||
|
pcre32_chartables.c
|
||||||
|
pcre32_compile.c
|
||||||
|
pcre32_config.c
|
||||||
|
pcre32_dfa_exec.c
|
||||||
|
pcre32_exec.c
|
||||||
|
pcre32_fullinfo.c
|
||||||
|
pcre32_get.c
|
||||||
|
pcre32_globals.c
|
||||||
|
pcre32_jit_compile.c
|
||||||
|
pcre32_maketables.c
|
||||||
|
pcre32_newline.c
|
||||||
|
pcre32_ord2utf32.c
|
||||||
|
pcre32_refcount.c
|
||||||
|
pcre32_string_utils.c
|
||||||
|
pcre32_study.c
|
||||||
|
pcre32_tables.c
|
||||||
|
pcre32_ucd.c
|
||||||
|
pcre32_utf32_utils.c
|
||||||
|
pcre32_valid_utf32.c
|
||||||
|
pcre32_version.c
|
||||||
|
pcre32_xclass.c
|
||||||
|
|
||||||
|
(9) If you want to build the POSIX wrapper functions (which apply only to the
|
||||||
|
8-bit library), ensure that you have the pcreposix.h file and then compile
|
||||||
|
pcreposix.c (remembering -DHAVE_CONFIG_H if necessary). Link the result
|
||||||
|
(on its own) as the pcreposix library.
|
||||||
|
|
||||||
|
(10) The pcretest program can be linked with any combination of the 8-bit,
|
||||||
|
16-bit and 32-bit libraries (depending on what you selected in config.h).
|
||||||
|
Compile pcretest.c and pcre_printint.c (again, don't forget
|
||||||
|
-DHAVE_CONFIG_H) and link them together with the appropriate library/ies.
|
||||||
|
If you compiled an 8-bit library, pcretest also needs the pcreposix
|
||||||
|
wrapper library unless you compiled it with -DNOPOSIX.
|
||||||
|
|
||||||
|
(11) Run pcretest on the testinput files in the testdata directory, and check
|
||||||
|
that the output matches the corresponding testoutput files. There are
|
||||||
|
comments about what each test does in the section entitled "Testing PCRE"
|
||||||
|
in the README file. If you compiled more than one of the 8-bit, 16-bit and
|
||||||
|
32-bit libraries, you need to run pcretest with the -16 option to do
|
||||||
|
16-bit tests and with the -32 option to do 32-bit tests.
|
||||||
|
|
||||||
|
Some tests are relevant only when certain build-time options are selected.
|
||||||
|
For example, test 4 is for UTF-8/UTF-16/UTF-32 support, and will not run
|
||||||
|
if you have built PCRE without it. See the comments at the start of each
|
||||||
|
testinput file. If you have a suitable Unix-like shell, the RunTest script
|
||||||
|
will run the appropriate tests for you. The command "RunTest list" will
|
||||||
|
output a list of all the tests.
|
||||||
|
|
||||||
|
Note that the supplied files are in Unix format, with just LF characters
|
||||||
|
as line terminators. You may need to edit them to change this if your
|
||||||
|
system uses a different convention. If you are using Windows, you probably
|
||||||
|
should use the wintestinput3 file instead of testinput3 (and the
|
||||||
|
corresponding output file). This is a locale test; wintestinput3 sets the
|
||||||
|
locale to "french" rather than "fr_FR", and there some minor output
|
||||||
|
differences.
|
||||||
|
|
||||||
|
(12) If you have built PCRE with SUPPORT_JIT, the JIT features will be tested
|
||||||
|
by the testdata files. However, you might also like to build and run
|
||||||
|
the freestanding JIT test program, pcre_jit_test.c.
|
||||||
|
|
||||||
|
(13) If you want to use the pcregrep command, compile and link pcregrep.c; it
|
||||||
|
uses only the basic 8-bit PCRE library (it does not need the pcreposix
|
||||||
|
library).
|
||||||
|
|
||||||
|
|
||||||
|
THE C++ WRAPPER FUNCTIONS
|
||||||
|
|
||||||
|
The PCRE distribution also contains some C++ wrapper functions and tests,
|
||||||
|
applicable to the 8-bit library, which were contributed by Google Inc. On a
|
||||||
|
system that can use "configure" and "make", the functions are automatically
|
||||||
|
built into a library called pcrecpp. It should be straightforward to compile
|
||||||
|
the .cc files manually on other systems. The files called xxx_unittest.cc are
|
||||||
|
test programs for each of the corresponding xxx.cc files.
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING FOR VIRTUAL PASCAL
|
||||||
|
|
||||||
|
A script for building PCRE using Borland's C++ compiler for use with VPASCAL
|
||||||
|
was contributed by Alexander Tokarev. Stefan Weber updated the script and added
|
||||||
|
additional files. The following files in the distribution are for building PCRE
|
||||||
|
for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat, pcregexp.pas.
|
||||||
|
|
||||||
|
|
||||||
|
STACK SIZE IN WINDOWS ENVIRONMENTS
|
||||||
|
|
||||||
|
The default processor stack size of 1Mb in some Windows environments is too
|
||||||
|
small for matching patterns that need much recursion. In particular, test 2 may
|
||||||
|
fail because of this. Normally, running out of stack causes a crash, but there
|
||||||
|
have been cases where the test program has just died silently. See your linker
|
||||||
|
documentation for how to increase stack size if you experience problems. The
|
||||||
|
Linux default of 8Mb is a reasonable choice for the stack, though even that can
|
||||||
|
be too small for some pattern/subject combinations.
|
||||||
|
|
||||||
|
PCRE has a compile configuration option to disable the use of stack for
|
||||||
|
recursion so that heap is used instead. However, pattern matching is
|
||||||
|
significantly slower when this is done. There is more about stack usage in the
|
||||||
|
"pcrestack" documentation.
|
||||||
|
|
||||||
|
|
||||||
|
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
|
||||||
|
|
||||||
|
If you want to statically link a program against a PCRE library in the form of
|
||||||
|
a non-dll .a file, you must define PCRE_STATIC before including pcre.h or
|
||||||
|
pcrecpp.h, otherwise the pcre_malloc() and pcre_free() exported functions will
|
||||||
|
be declared __declspec(dllimport), with unwanted results.
|
||||||
|
|
||||||
|
|
||||||
|
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
|
||||||
|
|
||||||
|
It is possible to compile programs to use different calling conventions using
|
||||||
|
MSVC. Search the web for "calling conventions" for more information. To make it
|
||||||
|
easier to change the calling convention for the exported functions in the
|
||||||
|
PCRE library, the macro PCRE_CALL_CONVENTION is present in all the external
|
||||||
|
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
|
||||||
|
not set, it defaults to empty; the default calling convention is then used
|
||||||
|
(which is what is wanted most of the time).
|
||||||
|
|
||||||
|
|
||||||
|
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE ON WINDOWS WITH CMAKE")
|
||||||
|
|
||||||
|
There are two ways of building PCRE using the "configure, make, make install"
|
||||||
|
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
|
||||||
|
the same thing; they are completely different from each other. There is also
|
||||||
|
support for building using CMake, which some users find a more straightforward
|
||||||
|
way of building PCRE under Windows.
|
||||||
|
|
||||||
|
The MinGW home page (http://www.mingw.org/) says this:
|
||||||
|
|
||||||
|
MinGW: A collection of freely available and freely distributable Windows
|
||||||
|
specific header files and import libraries combined with GNU toolsets that
|
||||||
|
allow one to produce native Windows programs that do not rely on any
|
||||||
|
3rd-party C runtime DLLs.
|
||||||
|
|
||||||
|
The Cygwin home page (http://www.cygwin.com/) says this:
|
||||||
|
|
||||||
|
Cygwin is a Linux-like environment for Windows. It consists of two parts:
|
||||||
|
|
||||||
|
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
|
||||||
|
substantial Linux API functionality
|
||||||
|
|
||||||
|
. A collection of tools which provide Linux look and feel.
|
||||||
|
|
||||||
|
The Cygwin DLL currently works with all recent, commercially released x86 32
|
||||||
|
bit and 64 bit versions of Windows, with the exception of Windows CE.
|
||||||
|
|
||||||
|
On both MinGW and Cygwin, PCRE should build correctly using:
|
||||||
|
|
||||||
|
./configure && make && make install
|
||||||
|
|
||||||
|
This should create two libraries called libpcre and libpcreposix, and, if you
|
||||||
|
have enabled building the C++ wrapper, a third one called libpcrecpp. These are
|
||||||
|
independent libraries: when you link with libpcreposix or libpcrecpp you must
|
||||||
|
also link with libpcre, which contains the basic functions. (Some earlier
|
||||||
|
releases of PCRE included the basic libpcre functions in libpcreposix. This no
|
||||||
|
longer happens.)
|
||||||
|
|
||||||
|
A user submitted a special-purpose patch that makes it easy to create
|
||||||
|
"pcre.dll" under mingw32 using the "msys" environment. It provides "pcre.dll"
|
||||||
|
as a special target. If you use this target, no other files are built, and in
|
||||||
|
particular, the pcretest and pcregrep programs are not built. An example of how
|
||||||
|
this might be used is:
|
||||||
|
|
||||||
|
./configure --enable-utf --disable-cpp CFLAGS="-03 -s"; make pcre.dll
|
||||||
|
|
||||||
|
Using Cygwin's compiler generates libraries and executables that depend on
|
||||||
|
cygwin1.dll. If a library that is generated this way is distributed,
|
||||||
|
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
|
||||||
|
licence, this forces not only PCRE to be under the GPL, but also the entire
|
||||||
|
application. A distributor who wants to keep their own code proprietary must
|
||||||
|
purchase an appropriate Cygwin licence.
|
||||||
|
|
||||||
|
MinGW has no such restrictions. The MinGW compiler generates a library or
|
||||||
|
executable that can run standalone on Windows without any third party dll or
|
||||||
|
licensing issues.
|
||||||
|
|
||||||
|
But there is more complication:
|
||||||
|
|
||||||
|
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
|
||||||
|
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
|
||||||
|
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
|
||||||
|
gcc and MinGW's gcc). So, a user can:
|
||||||
|
|
||||||
|
. Build native binaries by using MinGW or by getting Cygwin and using
|
||||||
|
-mno-cygwin.
|
||||||
|
|
||||||
|
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
|
||||||
|
compiler flags.
|
||||||
|
|
||||||
|
The test files that are supplied with PCRE are in UNIX format, with LF
|
||||||
|
characters as line terminators. Unless your PCRE library uses a default newline
|
||||||
|
option that includes LF as a valid newline, it may be necessary to change the
|
||||||
|
line terminators in the test files to get some of the tests to work.
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING PCRE ON WINDOWS WITH CMAKE
|
||||||
|
|
||||||
|
CMake is an alternative configuration facility that can be used instead of
|
||||||
|
"configure". CMake creates project files (make files, solution files, etc.)
|
||||||
|
tailored to numerous development environments, including Visual Studio,
|
||||||
|
Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
|
||||||
|
spaces in the names for your CMake installation and your PCRE source and build
|
||||||
|
directories.
|
||||||
|
|
||||||
|
The following instructions were contributed by a PCRE user. If they are not
|
||||||
|
followed exactly, errors may occur. In the event that errors do occur, it is
|
||||||
|
recommended that you delete the CMake cache before attempting to repeat the
|
||||||
|
CMake build process. In the CMake GUI, the cache can be deleted by selecting
|
||||||
|
"File > Delete Cache".
|
||||||
|
|
||||||
|
1. Install the latest CMake version available from http://www.cmake.org/, and
|
||||||
|
ensure that cmake\bin is on your path.
|
||||||
|
|
||||||
|
2. Unzip (retaining folder structure) the PCRE source tree into a source
|
||||||
|
directory such as C:\pcre. You should ensure your local date and time
|
||||||
|
is not earlier than the file dates in your source dir if the release is
|
||||||
|
very new.
|
||||||
|
|
||||||
|
3. Create a new, empty build directory, preferably a subdirectory of the
|
||||||
|
source dir. For example, C:\pcre\pcre-xx\build.
|
||||||
|
|
||||||
|
4. Run cmake-gui from the Shell envirornment of your build tool, for example,
|
||||||
|
Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
|
||||||
|
to start Cmake from the Windows Start menu, as this can lead to errors.
|
||||||
|
|
||||||
|
5. Enter C:\pcre\pcre-xx and C:\pcre\pcre-xx\build for the source and build
|
||||||
|
directories, respectively.
|
||||||
|
|
||||||
|
6. Hit the "Configure" button.
|
||||||
|
|
||||||
|
7. Select the particular IDE / build tool that you are using (Visual
|
||||||
|
Studio, MSYS makefiles, MinGW makefiles, etc.)
|
||||||
|
|
||||||
|
8. The GUI will then list several configuration options. This is where
|
||||||
|
you can enable UTF-8 support or other PCRE optional features.
|
||||||
|
|
||||||
|
9. Hit "Configure" again. The adjacent "Generate" button should now be
|
||||||
|
active.
|
||||||
|
|
||||||
|
10. Hit "Generate".
|
||||||
|
|
||||||
|
11. The build directory should now contain a usable build system, be it a
|
||||||
|
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
|
||||||
|
cmake-gui and use the generated build system with your compiler or IDE.
|
||||||
|
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE
|
||||||
|
solution, select the desired configuration (Debug, or Release, etc.) and
|
||||||
|
build the ALL_BUILD project.
|
||||||
|
|
||||||
|
12. If during configuration with cmake-gui you've elected to build the test
|
||||||
|
programs, you can execute them by building the test project. E.g., for
|
||||||
|
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
|
||||||
|
most recent build configuration is targeted by the tests. A summary of
|
||||||
|
test results is presented. Complete test output is subsequently
|
||||||
|
available for review in Testing\Temporary under your build dir.
|
||||||
|
|
||||||
|
|
||||||
|
USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS
|
||||||
|
|
||||||
|
A PCRE user comments as follows: I thought that others may want to know the
|
||||||
|
current state of CMAKE_USE_RELATIVE_PATHS support on Windows. Here it is:
|
||||||
|
|
||||||
|
-- AdditionalIncludeDirectories is only partially modified (only the
|
||||||
|
first path - see below)
|
||||||
|
-- Only some of the contained file paths are modified - shown below for
|
||||||
|
pcre.vcproj
|
||||||
|
-- It properly modifies
|
||||||
|
|
||||||
|
I am sure CMake people can fix that if they want to. Until then one will
|
||||||
|
need to replace existing absolute paths in project files with relative
|
||||||
|
paths manually (e.g. from VS) - relative to project file location. I did
|
||||||
|
just that before being told to try CMAKE_USE_RELATIVE_PATHS. Not a big
|
||||||
|
deal.
|
||||||
|
|
||||||
|
AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;"
|
||||||
|
AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;"
|
||||||
|
|
||||||
|
RelativePath="pcre.h"
|
||||||
|
RelativePath="pcre_chartables.c"
|
||||||
|
RelativePath="pcre_chartables.c.rule"
|
||||||
|
|
||||||
|
|
||||||
|
TESTING WITH RUNTEST.BAT
|
||||||
|
|
||||||
|
If configured with CMake, building the test project ("make test" or building
|
||||||
|
ALL_TESTS in Visual Studio) creates (and runs) pcre_test.bat (and depending
|
||||||
|
on your configuration options, possibly other test programs) in the build
|
||||||
|
directory. Pcre_test.bat runs RunTest.Bat with correct source and exe paths.
|
||||||
|
|
||||||
|
For manual testing with RunTest.bat, provided the build dir is a subdirectory
|
||||||
|
of the source directory: Open command shell window. Chdir to the location
|
||||||
|
of your pcretest.exe and pcregrep.exe programs. Call RunTest.bat with
|
||||||
|
"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
|
||||||
|
|
||||||
|
To run only a particular test with RunTest.Bat provide a test number argument.
|
||||||
|
|
||||||
|
Otherwise:
|
||||||
|
|
||||||
|
1. Copy RunTest.bat into the directory where pcretest.exe and pcregrep.exe
|
||||||
|
have been created.
|
||||||
|
|
||||||
|
2. Edit RunTest.bat to indentify the full or relative location of
|
||||||
|
the pcre source (wherein which the testdata folder resides), e.g.:
|
||||||
|
|
||||||
|
set srcdir=C:\pcre\pcre-8.20
|
||||||
|
|
||||||
|
3. In a Windows command environment, chdir to the location of your bat and
|
||||||
|
exe programs.
|
||||||
|
|
||||||
|
4. Run RunTest.bat. Test outputs will automatically be compared to expected
|
||||||
|
results, and discrepancies will be identified in the console output.
|
||||||
|
|
||||||
|
To independently test the just-in-time compiler, run pcre_jit_test.exe.
|
||||||
|
To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and
|
||||||
|
pcre_scanner_unittest.exe.
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x
|
||||||
|
|
||||||
|
Vincent Richomme sent a zip archive of files to help with this process. They
|
||||||
|
can be found in the file "pcre-vsbuild.zip" in the Contrib directory of the FTP
|
||||||
|
site.
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING UNDER WINDOWS WITH BCC5.5
|
||||||
|
|
||||||
|
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
|
||||||
|
|
||||||
|
Some of the core BCC libraries have a version of PCRE from 1998 built in, which
|
||||||
|
can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a version
|
||||||
|
mismatch. I'm including an easy workaround below, if you'd like to include it
|
||||||
|
in the non-unix instructions:
|
||||||
|
|
||||||
|
When linking a project with BCC5.5, pcre.lib must be included before any of the
|
||||||
|
libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command line.
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING USING BORLAND C++ BUILDER 2007 (CB2007) AND HIGHER
|
||||||
|
|
||||||
|
A PCRE user sent these comments about this environment (see also the comment
|
||||||
|
from another user that follows them):
|
||||||
|
|
||||||
|
The XE versions of C++ Builder come with a RegularExpressionsCore class which
|
||||||
|
contain a version of TPerlRegEx. However, direct use of the C PCRE library may
|
||||||
|
be desirable.
|
||||||
|
|
||||||
|
The default makevp.bat, however, supplied with PCRE builds a version of PCRE
|
||||||
|
that is not usable with any version of C++ Builder because the compiler ships
|
||||||
|
with an embedded version of PCRE, version 2.01 from 1998! [See also the note
|
||||||
|
about BCC5.5 above.] If you want to use PCRE you'll need to rename the
|
||||||
|
functions (pcre_compile to pcre_compile_bcc, etc) or do as I have done and just
|
||||||
|
use the 16 bit versions. I'm using std::wstring everywhere anyway. Since the
|
||||||
|
embedded version of PCRE does not have the 16 bit function names, there is no
|
||||||
|
conflict.
|
||||||
|
|
||||||
|
Building PCRE using a C++ Builder static library project file (recommended):
|
||||||
|
|
||||||
|
1. Rename or remove pcre.h, pcreposi.h, and pcreposix.h from your C++ Builder
|
||||||
|
original include path.
|
||||||
|
|
||||||
|
2. Download PCRE from pcre.org and extract to a directory.
|
||||||
|
|
||||||
|
3. Rename pcre_chartables.c.dist to pcre_chartables.c, pcre.h.generic to
|
||||||
|
pcre.h, and config.h.generic to config.h.
|
||||||
|
|
||||||
|
4. Edit pcre.h and pcre_config.c so that they include config.h.
|
||||||
|
|
||||||
|
5. Edit config.h like so:
|
||||||
|
|
||||||
|
Comment out the following lines:
|
||||||
|
#define PACKAGE "pcre"
|
||||||
|
#define PACKAGE_BUGREPORT ""
|
||||||
|
#define PACKAGE_NAME "PCRE"
|
||||||
|
#define PACKAGE_STRING "PCRE 8.32"
|
||||||
|
#define PACKAGE_TARNAME "pcre"
|
||||||
|
#define PACKAGE_URL ""
|
||||||
|
#define PACKAGE_VERSION "8.32"
|
||||||
|
|
||||||
|
Add the following lines:
|
||||||
|
#ifndef SUPPORT_UTF
|
||||||
|
#define SUPPORT_UTF 100 // any value is fine
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef SUPPORT_UCP
|
||||||
|
#define SUPPORT_UCP 101 // any value is fine
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef SUPPORT_UCP
|
||||||
|
#define SUPPORT_PCRE16 102 // any value is fine
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef SUPPORT_UTF8
|
||||||
|
#define SUPPORT_UTF8 103 // any value is fine
|
||||||
|
#endif
|
||||||
|
|
||||||
|
6. Build a C++ Builder project using the IDE. Go to File / New / Other and
|
||||||
|
choose Static Library. You can name it pcre.cbproj or whatever. Now set your
|
||||||
|
paths by going to Project / Options. Set the Include path. Do this from the
|
||||||
|
"Base" option to apply to both Release and Debug builds. Now add the following
|
||||||
|
files to the project:
|
||||||
|
|
||||||
|
pcre.h
|
||||||
|
pcre16_byte_order.c
|
||||||
|
pcre16_chartables.c
|
||||||
|
pcre16_compile.c
|
||||||
|
pcre16_config.c
|
||||||
|
pcre16_dfa_exec.c
|
||||||
|
pcre16_exec.c
|
||||||
|
pcre16_fullinfo.c
|
||||||
|
pcre16_get.c
|
||||||
|
pcre16_globals.c
|
||||||
|
pcre16_maketables.c
|
||||||
|
pcre16_newline.c
|
||||||
|
pcre16_ord2utf16.c
|
||||||
|
pcre16_printint.c
|
||||||
|
pcre16_refcount.c
|
||||||
|
pcre16_string_utils.c
|
||||||
|
pcre16_study.c
|
||||||
|
pcre16_tables.c
|
||||||
|
pcre16_ucd.c
|
||||||
|
pcre16_utf16_utils.c
|
||||||
|
pcre16_valid_utf16.c
|
||||||
|
pcre16_version.c
|
||||||
|
pcre16_xclass.c
|
||||||
|
|
||||||
|
//Optional
|
||||||
|
pcre_version.c
|
||||||
|
|
||||||
|
7. After compiling the .lib file, copy the .lib and header files to a project
|
||||||
|
you want to use PCRE with. Enjoy.
|
||||||
|
|
||||||
|
Optional ... Building PCRE using the makevp.bat file:
|
||||||
|
|
||||||
|
1. Edit makevp_c.txt and makevp_l.txt and change all the names to the 16 bit
|
||||||
|
versions.
|
||||||
|
|
||||||
|
2. Edit makevp.bat and set the path to C++ Builder. Run makevp.bat.
|
||||||
|
|
||||||
|
Another PCRE user added this comment:
|
||||||
|
|
||||||
|
Another approach I successfully used for some years with BCB 5 and 6 was to
|
||||||
|
make sure that include and library paths of PCRE are configured before the
|
||||||
|
default paths of the IDE in the dialogs where one can manage those paths.
|
||||||
|
Afterwards one can open the project files using a text editor and manually add
|
||||||
|
the self created library for pcre itself, pcrecpp doesn't ship with the IDE, in
|
||||||
|
the library nodes where the IDE manages its own libraries to link against in
|
||||||
|
front of the IDE-own libraries. This way one can use the default PCRE function
|
||||||
|
names without getting access violations on runtime.
|
||||||
|
|
||||||
|
<ALLLIB value="libpcre.lib $(LIBFILES) $(LIBRARIES) import32.lib cp32mt.lib"/>
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING PCRE ON OPENVMS
|
||||||
|
|
||||||
|
Stephen Hoffman sent the following, in December 2012:
|
||||||
|
|
||||||
|
"Here <http://labs.hoffmanlabs.com/node/1847> is a very short write-up on the
|
||||||
|
OpenVMS port and here
|
||||||
|
|
||||||
|
<http://labs.hoffmanlabs.com/labsnotes/pcre-vms-8_32.zip>
|
||||||
|
|
||||||
|
is a zip with the OpenVMS files, and with one modified testing-related PCRE
|
||||||
|
file." This is a port of PCRE 8.32.
|
||||||
|
|
||||||
|
Earlier, Dan Mooney sent the following comments about building PCRE on OpenVMS.
|
||||||
|
They relate to an older version of PCRE that used fewer source files, so the
|
||||||
|
exact commands will need changing. See the current list of source files above.
|
||||||
|
|
||||||
|
"It was quite easy to compile and link the library. I don't have a formal
|
||||||
|
make file but the attached file [reproduced below] contains the OpenVMS DCL
|
||||||
|
commands I used to build the library. I had to add #define
|
||||||
|
POSIX_MALLOC_THRESHOLD 10 to pcre.h since it was not defined anywhere.
|
||||||
|
|
||||||
|
The library was built on:
|
||||||
|
O/S: HP OpenVMS v7.3-1
|
||||||
|
Compiler: Compaq C v6.5-001-48BCD
|
||||||
|
Linker: vA13-01
|
||||||
|
|
||||||
|
The test results did not match 100% due to the issues you mention in your
|
||||||
|
documentation regarding isprint(), iscntrl(), isgraph() and ispunct(). I
|
||||||
|
modified some of the character tables temporarily and was able to get the
|
||||||
|
results to match. Tests using the fr locale did not match since I don't have
|
||||||
|
that locale loaded. The study size was always reported to be 3 less than the
|
||||||
|
value in the standard test output files."
|
||||||
|
|
||||||
|
=========================
|
||||||
|
$! This DCL procedure builds PCRE on OpenVMS
|
||||||
|
$!
|
||||||
|
$! I followed the instructions in the non-unix-use file in the distribution.
|
||||||
|
$!
|
||||||
|
$ COMPILE == "CC/LIST/NOMEMBER_ALIGNMENT/PREFIX_LIBRARY_ENTRIES=ALL_ENTRIES
|
||||||
|
$ COMPILE DFTABLES.C
|
||||||
|
$ LINK/EXE=DFTABLES.EXE DFTABLES.OBJ
|
||||||
|
$ RUN DFTABLES.EXE/OUTPUT=CHARTABLES.C
|
||||||
|
$ COMPILE MAKETABLES.C
|
||||||
|
$ COMPILE GET.C
|
||||||
|
$ COMPILE STUDY.C
|
||||||
|
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
|
||||||
|
$! did not seem to be defined anywhere.
|
||||||
|
$! I edited pcre.h and added #DEFINE SUPPORT_UTF8 to enable UTF8 support.
|
||||||
|
$ COMPILE PCRE.C
|
||||||
|
$ LIB/CREATE PCRE MAKETABLES.OBJ, GET.OBJ, STUDY.OBJ, PCRE.OBJ
|
||||||
|
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
|
||||||
|
$! did not seem to be defined anywhere.
|
||||||
|
$ COMPILE PCREPOSIX.C
|
||||||
|
$ LIB/CREATE PCREPOSIX PCREPOSIX.OBJ
|
||||||
|
$ COMPILE PCRETEST.C
|
||||||
|
$ LINK/EXE=PCRETEST.EXE PCRETEST.OBJ, PCRE/LIB, PCREPOSIX/LIB
|
||||||
|
$! C programs that want access to command line arguments must be
|
||||||
|
$! defined as a symbol
|
||||||
|
$ PCRETEST :== "$ SYS$ROADSUSERS:[DMOONEY.REGEXP]PCRETEST.EXE"
|
||||||
|
$! Arguments must be enclosed in quotes.
|
||||||
|
$ PCRETEST "-C"
|
||||||
|
$! Test results:
|
||||||
|
$!
|
||||||
|
$! The test results did not match 100%. The functions isprint(), iscntrl(),
|
||||||
|
$! isgraph() and ispunct() on OpenVMS must not produce the same results
|
||||||
|
$! as the system that built the test output files provided with the
|
||||||
|
$! distribution.
|
||||||
|
$!
|
||||||
|
$! The study size did not match and was always 3 less on OpenVMS.
|
||||||
|
$!
|
||||||
|
$! Locale could not be set to fr
|
||||||
|
$!
|
||||||
|
=========================
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING PCRE ON STRATUS OPENVOS
|
||||||
|
|
||||||
|
These notes on the port of PCRE to VOS (lightly edited) were supplied by
|
||||||
|
Ashutosh Warikoo, whose email address has the local part awarikoo and the
|
||||||
|
domain nse.co.in. The port was for version 7.9 in August 2009.
|
||||||
|
|
||||||
|
1. Building PCRE
|
||||||
|
|
||||||
|
I built pcre on OpenVOS Release 17.0.1at using GNU Tools 3.4a without any
|
||||||
|
problems. I used the following packages to build PCRE:
|
||||||
|
|
||||||
|
ftp://ftp.stratus.com/pub/vos/posix/ga/posix.save.evf.gz
|
||||||
|
|
||||||
|
Please read and follow the instructions that come with these packages. To start
|
||||||
|
the build of pcre, from the root of the package type:
|
||||||
|
|
||||||
|
./build.sh
|
||||||
|
|
||||||
|
2. Installing PCRE
|
||||||
|
|
||||||
|
Once you have successfully built PCRE, login to the SysAdmin group, switch to
|
||||||
|
the root user, and type
|
||||||
|
|
||||||
|
[ !create_dir (master_disk)>usr --if needed ]
|
||||||
|
[ !create_dir (master_disk)>usr>local --if needed ]
|
||||||
|
!gmake install
|
||||||
|
|
||||||
|
This installs PCRE and its man pages into /usr/local. You can add
|
||||||
|
(master_disk)>usr>local>bin to your command search paths, or if you are in
|
||||||
|
BASH, add /usr/local/bin to the PATH environment variable.
|
||||||
|
|
||||||
|
4. Restrictions
|
||||||
|
|
||||||
|
This port requires readline library optionally. However during the build I
|
||||||
|
faced some yet unexplored errors while linking with readline. As it was an
|
||||||
|
optional component I chose to disable it.
|
||||||
|
|
||||||
|
5. Known Problems
|
||||||
|
|
||||||
|
I ran the test suite, but you will have to be your own judge of whether this
|
||||||
|
command, and this port, suits your purposes. If you find any problems that
|
||||||
|
appear to be related to the port itself, please let me know. Please see the
|
||||||
|
build.log file in the root of the package also.
|
||||||
|
|
||||||
|
|
||||||
|
BUILDING PCRE ON NATIVE Z/OS AND Z/VM
|
||||||
|
|
||||||
|
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
|
||||||
|
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
|
||||||
|
applications can be supported through UNIX System Services, and in such an
|
||||||
|
environment PCRE can be built in the same way as in other systems. However, in
|
||||||
|
native z/OS (without UNIX System Services) and in z/VM, special ports are
|
||||||
|
required. For details, please see this web site:
|
||||||
|
|
||||||
|
http://www.zaconsultants.net
|
||||||
|
|
||||||
|
There is also a mirror here:
|
||||||
|
|
||||||
|
http://www.vsoft-software.com/downloads.html
|
||||||
|
|
||||||
|
==========================
|
||||||
|
Last Updated: 14 May 2013
|
991
tools/pcre/doc/html/README.txt
Normal file
991
tools/pcre/doc/html/README.txt
Normal file
@ -0,0 +1,991 @@
|
|||||||
|
README file for PCRE (Perl-compatible regular expression library)
|
||||||
|
-----------------------------------------------------------------
|
||||||
|
|
||||||
|
The latest release of PCRE is always available in three alternative formats
|
||||||
|
from:
|
||||||
|
|
||||||
|
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz
|
||||||
|
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.bz2
|
||||||
|
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
|
||||||
|
|
||||||
|
There is a mailing list for discussion about the development of PCRE at
|
||||||
|
pcre-dev@exim.org. You can access the archives and subscribe or manage your
|
||||||
|
subscription here:
|
||||||
|
|
||||||
|
https://lists.exim.org/mailman/listinfo/pcre-dev
|
||||||
|
|
||||||
|
Please read the NEWS file if you are upgrading from a previous release.
|
||||||
|
The contents of this README file are:
|
||||||
|
|
||||||
|
The PCRE APIs
|
||||||
|
Documentation for PCRE
|
||||||
|
Contributions by users of PCRE
|
||||||
|
Building PCRE on non-Unix-like systems
|
||||||
|
Building PCRE without using autotools
|
||||||
|
Building PCRE using autotools
|
||||||
|
Retrieving configuration information
|
||||||
|
Shared libraries
|
||||||
|
Cross-compiling using autotools
|
||||||
|
Using HP's ANSI C++ compiler (aCC)
|
||||||
|
Compiling in Tru64 using native compilers
|
||||||
|
Using Sun's compilers for Solaris
|
||||||
|
Using PCRE from MySQL
|
||||||
|
Making new tarballs
|
||||||
|
Testing PCRE
|
||||||
|
Character tables
|
||||||
|
File manifest
|
||||||
|
|
||||||
|
|
||||||
|
The PCRE APIs
|
||||||
|
-------------
|
||||||
|
|
||||||
|
PCRE is written in C, and it has its own API. There are three sets of
|
||||||
|
functions, one for the 8-bit library, which processes strings of bytes, one for
|
||||||
|
the 16-bit library, which processes strings of 16-bit values, and one for the
|
||||||
|
32-bit library, which processes strings of 32-bit values. The distribution also
|
||||||
|
includes a set of C++ wrapper functions (see the pcrecpp man page for details),
|
||||||
|
courtesy of Google Inc., which can be used to call the 8-bit PCRE library from
|
||||||
|
C++.
|
||||||
|
|
||||||
|
In addition, there is a set of C wrapper functions (again, just for the 8-bit
|
||||||
|
library) that are based on the POSIX regular expression API (see the pcreposix
|
||||||
|
man page). These end up in the library called libpcreposix. Note that this just
|
||||||
|
provides a POSIX calling interface to PCRE; the regular expressions themselves
|
||||||
|
still follow Perl syntax and semantics. The POSIX API is restricted, and does
|
||||||
|
not give full access to all of PCRE's facilities.
|
||||||
|
|
||||||
|
The header file for the POSIX-style functions is called pcreposix.h. The
|
||||||
|
official POSIX name is regex.h, but I did not want to risk possible problems
|
||||||
|
with existing files of that name by distributing it that way. To use PCRE with
|
||||||
|
an existing program that uses the POSIX API, pcreposix.h will have to be
|
||||||
|
renamed or pointed at by a link.
|
||||||
|
|
||||||
|
If you are using the POSIX interface to PCRE and there is already a POSIX regex
|
||||||
|
library installed on your system, as well as worrying about the regex.h header
|
||||||
|
file (as mentioned above), you must also take care when linking programs to
|
||||||
|
ensure that they link with PCRE's libpcreposix library. Otherwise they may pick
|
||||||
|
up the POSIX functions of the same name from the other library.
|
||||||
|
|
||||||
|
One way of avoiding this confusion is to compile PCRE with the addition of
|
||||||
|
-Dregcomp=PCREregcomp (and similarly for the other POSIX functions) to the
|
||||||
|
compiler flags (CFLAGS if you are using "configure" -- see below). This has the
|
||||||
|
effect of renaming the functions so that the names no longer clash. Of course,
|
||||||
|
you have to do the same thing for your applications, or write them using the
|
||||||
|
new names.
|
||||||
|
|
||||||
|
|
||||||
|
Documentation for PCRE
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
If you install PCRE in the normal way on a Unix-like system, you will end up
|
||||||
|
with a set of man pages whose names all start with "pcre". The one that is just
|
||||||
|
called "pcre" lists all the others. In addition to these man pages, the PCRE
|
||||||
|
documentation is supplied in two other forms:
|
||||||
|
|
||||||
|
1. There are files called doc/pcre.txt, doc/pcregrep.txt, and
|
||||||
|
doc/pcretest.txt in the source distribution. The first of these is a
|
||||||
|
concatenation of the text forms of all the section 3 man pages except
|
||||||
|
the listing of pcredemo.c and those that summarize individual functions.
|
||||||
|
The other two are the text forms of the section 1 man pages for the
|
||||||
|
pcregrep and pcretest commands. These text forms are provided for ease of
|
||||||
|
scanning with text editors or similar tools. They are installed in
|
||||||
|
<prefix>/share/doc/pcre, where <prefix> is the installation prefix
|
||||||
|
(defaulting to /usr/local).
|
||||||
|
|
||||||
|
2. A set of files containing all the documentation in HTML form, hyperlinked
|
||||||
|
in various ways, and rooted in a file called index.html, is distributed in
|
||||||
|
doc/html and installed in <prefix>/share/doc/pcre/html.
|
||||||
|
|
||||||
|
Users of PCRE have contributed files containing the documentation for various
|
||||||
|
releases in CHM format. These can be found in the Contrib directory of the FTP
|
||||||
|
site (see next section).
|
||||||
|
|
||||||
|
|
||||||
|
Contributions by users of PCRE
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
You can find contributions from PCRE users in the directory
|
||||||
|
|
||||||
|
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
|
||||||
|
|
||||||
|
There is a README file giving brief descriptions of what they are. Some are
|
||||||
|
complete in themselves; others are pointers to URLs containing relevant files.
|
||||||
|
Some of this material is likely to be well out-of-date. Several of the earlier
|
||||||
|
contributions provided support for compiling PCRE on various flavours of
|
||||||
|
Windows (I myself do not use Windows). Nowadays there is more Windows support
|
||||||
|
in the standard distribution, so these contibutions have been archived.
|
||||||
|
|
||||||
|
A PCRE user maintains downloadable Windows binaries of the pcregrep and
|
||||||
|
pcretest programs here:
|
||||||
|
|
||||||
|
http://www.rexegg.com/pcregrep-pcretest.html
|
||||||
|
|
||||||
|
|
||||||
|
Building PCRE on non-Unix-like systems
|
||||||
|
--------------------------------------
|
||||||
|
|
||||||
|
For a non-Unix-like system, please read the comments in the file
|
||||||
|
NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
|
||||||
|
"make" you may be able to build PCRE using autotools in the same way as for
|
||||||
|
many Unix-like systems.
|
||||||
|
|
||||||
|
PCRE can also be configured using the GUI facility provided by CMake's
|
||||||
|
cmake-gui command. This creates Makefiles, solution files, etc. The file
|
||||||
|
NON-AUTOTOOLS-BUILD has information about CMake.
|
||||||
|
|
||||||
|
PCRE has been compiled on many different operating systems. It should be
|
||||||
|
straightforward to build PCRE on any system that has a Standard C compiler and
|
||||||
|
library, because it uses only Standard C functions.
|
||||||
|
|
||||||
|
|
||||||
|
Building PCRE without using autotools
|
||||||
|
-------------------------------------
|
||||||
|
|
||||||
|
The use of autotools (in particular, libtool) is problematic in some
|
||||||
|
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
|
||||||
|
file for ways of building PCRE without using autotools.
|
||||||
|
|
||||||
|
|
||||||
|
Building PCRE using autotools
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
If you are using HP's ANSI C++ compiler (aCC), please see the special note
|
||||||
|
in the section entitled "Using HP's ANSI C++ compiler (aCC)" below.
|
||||||
|
|
||||||
|
The following instructions assume the use of the widely used "configure; make;
|
||||||
|
make install" (autotools) process.
|
||||||
|
|
||||||
|
To build PCRE on system that supports autotools, first run the "configure"
|
||||||
|
command from the PCRE distribution directory, with your current directory set
|
||||||
|
to the directory where you want the files to be created. This command is a
|
||||||
|
standard GNU "autoconf" configuration script, for which generic instructions
|
||||||
|
are supplied in the file INSTALL.
|
||||||
|
|
||||||
|
Most commonly, people build PCRE within its own distribution directory, and in
|
||||||
|
this case, on many systems, just running "./configure" is sufficient. However,
|
||||||
|
the usual methods of changing standard defaults are available. For example:
|
||||||
|
|
||||||
|
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
|
||||||
|
|
||||||
|
This command specifies that the C compiler should be run with the flags '-O2
|
||||||
|
-Wall' instead of the default, and that "make install" should install PCRE
|
||||||
|
under /opt/local instead of the default /usr/local.
|
||||||
|
|
||||||
|
If you want to build in a different directory, just run "configure" with that
|
||||||
|
directory as current. For example, suppose you have unpacked the PCRE source
|
||||||
|
into /source/pcre/pcre-xxx, but you want to build it in /build/pcre/pcre-xxx:
|
||||||
|
|
||||||
|
cd /build/pcre/pcre-xxx
|
||||||
|
/source/pcre/pcre-xxx/configure
|
||||||
|
|
||||||
|
PCRE is written in C and is normally compiled as a C library. However, it is
|
||||||
|
possible to build it as a C++ library, though the provided building apparatus
|
||||||
|
does not have any features to support this.
|
||||||
|
|
||||||
|
There are some optional features that can be included or omitted from the PCRE
|
||||||
|
library. They are also documented in the pcrebuild man page.
|
||||||
|
|
||||||
|
. By default, both shared and static libraries are built. You can change this
|
||||||
|
by adding one of these options to the "configure" command:
|
||||||
|
|
||||||
|
--disable-shared
|
||||||
|
--disable-static
|
||||||
|
|
||||||
|
(See also "Shared libraries on Unix-like systems" below.)
|
||||||
|
|
||||||
|
. By default, only the 8-bit library is built. If you add --enable-pcre16 to
|
||||||
|
the "configure" command, the 16-bit library is also built. If you add
|
||||||
|
--enable-pcre32 to the "configure" command, the 32-bit library is also built.
|
||||||
|
If you want only the 16-bit or 32-bit library, use --disable-pcre8 to disable
|
||||||
|
building the 8-bit library.
|
||||||
|
|
||||||
|
. If you are building the 8-bit library and want to suppress the building of
|
||||||
|
the C++ wrapper library, you can add --disable-cpp to the "configure"
|
||||||
|
command. Otherwise, when "configure" is run without --disable-pcre8, it will
|
||||||
|
try to find a C++ compiler and C++ header files, and if it succeeds, it will
|
||||||
|
try to build the C++ wrapper.
|
||||||
|
|
||||||
|
. If you want to include support for just-in-time compiling, which can give
|
||||||
|
large performance improvements on certain platforms, add --enable-jit to the
|
||||||
|
"configure" command. This support is available only for certain hardware
|
||||||
|
architectures. If you try to enable it on an unsupported architecture, there
|
||||||
|
will be a compile time error.
|
||||||
|
|
||||||
|
. When JIT support is enabled, pcregrep automatically makes use of it, unless
|
||||||
|
you add --disable-pcregrep-jit to the "configure" command.
|
||||||
|
|
||||||
|
. If you want to make use of the support for UTF-8 Unicode character strings in
|
||||||
|
the 8-bit library, or UTF-16 Unicode character strings in the 16-bit library,
|
||||||
|
or UTF-32 Unicode character strings in the 32-bit library, you must add
|
||||||
|
--enable-utf to the "configure" command. Without it, the code for handling
|
||||||
|
UTF-8, UTF-16 and UTF-8 is not included in the relevant library. Even
|
||||||
|
when --enable-utf is included, the use of a UTF encoding still has to be
|
||||||
|
enabled by an option at run time. When PCRE is compiled with this option, its
|
||||||
|
input can only either be ASCII or UTF-8/16/32, even when running on EBCDIC
|
||||||
|
platforms. It is not possible to use both --enable-utf and --enable-ebcdic at
|
||||||
|
the same time.
|
||||||
|
|
||||||
|
. There are no separate options for enabling UTF-8, UTF-16 and UTF-32
|
||||||
|
independently because that would allow ridiculous settings such as requesting
|
||||||
|
UTF-16 support while building only the 8-bit library. However, the option
|
||||||
|
--enable-utf8 is retained for backwards compatibility with earlier releases
|
||||||
|
that did not support 16-bit or 32-bit character strings. It is synonymous with
|
||||||
|
--enable-utf. It is not possible to configure one library with UTF support
|
||||||
|
and the other without in the same configuration.
|
||||||
|
|
||||||
|
. If, in addition to support for UTF-8/16/32 character strings, you want to
|
||||||
|
include support for the \P, \p, and \X sequences that recognize Unicode
|
||||||
|
character properties, you must add --enable-unicode-properties to the
|
||||||
|
"configure" command. This adds about 30K to the size of the library (in the
|
||||||
|
form of a property table); only the basic two-letter properties such as Lu
|
||||||
|
are supported.
|
||||||
|
|
||||||
|
. You can build PCRE to recognize either CR or LF or the sequence CRLF or any
|
||||||
|
of the preceding, or any of the Unicode newline sequences as indicating the
|
||||||
|
end of a line. Whatever you specify at build time is the default; the caller
|
||||||
|
of PCRE can change the selection at run time. The default newline indicator
|
||||||
|
is a single LF character (the Unix standard). You can specify the default
|
||||||
|
newline indicator by adding --enable-newline-is-cr or --enable-newline-is-lf
|
||||||
|
or --enable-newline-is-crlf or --enable-newline-is-anycrlf or
|
||||||
|
--enable-newline-is-any to the "configure" command, respectively.
|
||||||
|
|
||||||
|
If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
|
||||||
|
the standard tests will fail, because the lines in the test files end with
|
||||||
|
LF. Even if the files are edited to change the line endings, there are likely
|
||||||
|
to be some failures. With --enable-newline-is-anycrlf or
|
||||||
|
--enable-newline-is-any, many tests should succeed, but there may be some
|
||||||
|
failures.
|
||||||
|
|
||||||
|
. By default, the sequence \R in a pattern matches any Unicode line ending
|
||||||
|
sequence. This is independent of the option specifying what PCRE considers to
|
||||||
|
be the end of a line (see above). However, the caller of PCRE can restrict \R
|
||||||
|
to match only CR, LF, or CRLF. You can make this the default by adding
|
||||||
|
--enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
|
||||||
|
|
||||||
|
. When called via the POSIX interface, PCRE uses malloc() to get additional
|
||||||
|
storage for processing capturing parentheses if there are more than 10 of
|
||||||
|
them in a pattern. You can increase this threshold by setting, for example,
|
||||||
|
|
||||||
|
--with-posix-malloc-threshold=20
|
||||||
|
|
||||||
|
on the "configure" command.
|
||||||
|
|
||||||
|
. PCRE has a counter that limits the depth of nesting of parentheses in a
|
||||||
|
pattern. This limits the amount of system stack that a pattern uses when it
|
||||||
|
is compiled. The default is 250, but you can change it by setting, for
|
||||||
|
example,
|
||||||
|
|
||||||
|
--with-parens-nest-limit=500
|
||||||
|
|
||||||
|
. PCRE has a counter that can be set to limit the amount of resources it uses
|
||||||
|
when matching a pattern. If the limit is exceeded during a match, the match
|
||||||
|
fails. The default is ten million. You can change the default by setting, for
|
||||||
|
example,
|
||||||
|
|
||||||
|
--with-match-limit=500000
|
||||||
|
|
||||||
|
on the "configure" command. This is just the default; individual calls to
|
||||||
|
pcre_exec() can supply their own value. There is more discussion on the
|
||||||
|
pcreapi man page.
|
||||||
|
|
||||||
|
. There is a separate counter that limits the depth of recursive function calls
|
||||||
|
during a matching process. This also has a default of ten million, which is
|
||||||
|
essentially "unlimited". You can change the default by setting, for example,
|
||||||
|
|
||||||
|
--with-match-limit-recursion=500000
|
||||||
|
|
||||||
|
Recursive function calls use up the runtime stack; running out of stack can
|
||||||
|
cause programs to crash in strange ways. There is a discussion about stack
|
||||||
|
sizes in the pcrestack man page.
|
||||||
|
|
||||||
|
. The default maximum compiled pattern size is around 64K. You can increase
|
||||||
|
this by adding --with-link-size=3 to the "configure" command. In the 8-bit
|
||||||
|
library, PCRE then uses three bytes instead of two for offsets to different
|
||||||
|
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
|
||||||
|
the same as --with-link-size=4, which (in both libraries) uses four-byte
|
||||||
|
offsets. Increasing the internal link size reduces performance. In the 32-bit
|
||||||
|
library, the only supported link size is 4.
|
||||||
|
|
||||||
|
. You can build PCRE so that its internal match() function that is called from
|
||||||
|
pcre_exec() does not call itself recursively. Instead, it uses memory blocks
|
||||||
|
obtained from the heap via the special functions pcre_stack_malloc() and
|
||||||
|
pcre_stack_free() to save data that would otherwise be saved on the stack. To
|
||||||
|
build PCRE like this, use
|
||||||
|
|
||||||
|
--disable-stack-for-recursion
|
||||||
|
|
||||||
|
on the "configure" command. PCRE runs more slowly in this mode, but it may be
|
||||||
|
necessary in environments with limited stack sizes. This applies only to the
|
||||||
|
normal execution of the pcre_exec() function; if JIT support is being
|
||||||
|
successfully used, it is not relevant. Equally, it does not apply to
|
||||||
|
pcre_dfa_exec(), which does not use deeply nested recursion. There is a
|
||||||
|
discussion about stack sizes in the pcrestack man page.
|
||||||
|
|
||||||
|
. For speed, PCRE uses four tables for manipulating and identifying characters
|
||||||
|
whose code point values are less than 256. By default, it uses a set of
|
||||||
|
tables for ASCII encoding that is part of the distribution. If you specify
|
||||||
|
|
||||||
|
--enable-rebuild-chartables
|
||||||
|
|
||||||
|
a program called dftables is compiled and run in the default C locale when
|
||||||
|
you obey "make". It builds a source file called pcre_chartables.c. If you do
|
||||||
|
not specify this option, pcre_chartables.c is created as a copy of
|
||||||
|
pcre_chartables.c.dist. See "Character tables" below for further information.
|
||||||
|
|
||||||
|
. It is possible to compile PCRE for use on systems that use EBCDIC as their
|
||||||
|
character code (as opposed to ASCII/Unicode) by specifying
|
||||||
|
|
||||||
|
--enable-ebcdic
|
||||||
|
|
||||||
|
This automatically implies --enable-rebuild-chartables (see above). However,
|
||||||
|
when PCRE is built this way, it always operates in EBCDIC. It cannot support
|
||||||
|
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
|
||||||
|
which specifies that the code value for the EBCDIC NL character is 0x25
|
||||||
|
instead of the default 0x15.
|
||||||
|
|
||||||
|
. In environments where valgrind is installed, if you specify
|
||||||
|
|
||||||
|
--enable-valgrind
|
||||||
|
|
||||||
|
PCRE will use valgrind annotations to mark certain memory regions as
|
||||||
|
unaddressable. This allows it to detect invalid memory accesses, and is
|
||||||
|
mostly useful for debugging PCRE itself.
|
||||||
|
|
||||||
|
. In environments where the gcc compiler is used and lcov version 1.6 or above
|
||||||
|
is installed, if you specify
|
||||||
|
|
||||||
|
--enable-coverage
|
||||||
|
|
||||||
|
the build process implements a code coverage report for the test suite. The
|
||||||
|
report is generated by running "make coverage". If ccache is installed on
|
||||||
|
your system, it must be disabled when building PCRE for coverage reporting.
|
||||||
|
You can do this by setting the environment variable CCACHE_DISABLE=1 before
|
||||||
|
running "make" to build PCRE. There is more information about coverage
|
||||||
|
reporting in the "pcrebuild" documentation.
|
||||||
|
|
||||||
|
. The pcregrep program currently supports only 8-bit data files, and so
|
||||||
|
requires the 8-bit PCRE library. It is possible to compile pcregrep to use
|
||||||
|
libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
|
||||||
|
specifying one or both of
|
||||||
|
|
||||||
|
--enable-pcregrep-libz
|
||||||
|
--enable-pcregrep-libbz2
|
||||||
|
|
||||||
|
Of course, the relevant libraries must be installed on your system.
|
||||||
|
|
||||||
|
. The default size (in bytes) of the internal buffer used by pcregrep can be
|
||||||
|
set by, for example:
|
||||||
|
|
||||||
|
--with-pcregrep-bufsize=51200
|
||||||
|
|
||||||
|
The value must be a plain integer. The default is 20480.
|
||||||
|
|
||||||
|
. It is possible to compile pcretest so that it links with the libreadline
|
||||||
|
or libedit libraries, by specifying, respectively,
|
||||||
|
|
||||||
|
--enable-pcretest-libreadline or --enable-pcretest-libedit
|
||||||
|
|
||||||
|
If this is done, when pcretest's input is from a terminal, it reads it using
|
||||||
|
the readline() function. This provides line-editing and history facilities.
|
||||||
|
Note that libreadline is GPL-licenced, so if you distribute a binary of
|
||||||
|
pcretest linked in this way, there may be licensing issues. These can be
|
||||||
|
avoided by linking with libedit (which has a BSD licence) instead.
|
||||||
|
|
||||||
|
Enabling libreadline causes the -lreadline option to be added to the pcretest
|
||||||
|
build. In many operating environments with a sytem-installed readline
|
||||||
|
library this is sufficient. However, in some environments (e.g. if an
|
||||||
|
unmodified distribution version of readline is in use), it may be necessary
|
||||||
|
to specify something like LIBS="-lncurses" as well. This is because, to quote
|
||||||
|
the readline INSTALL, "Readline uses the termcap functions, but does not link
|
||||||
|
with the termcap or curses library itself, allowing applications which link
|
||||||
|
with readline the to choose an appropriate library." If you get error
|
||||||
|
messages about missing functions tgetstr, tgetent, tputs, tgetflag, or tgoto,
|
||||||
|
this is the problem, and linking with the ncurses library should fix it.
|
||||||
|
|
||||||
|
The "configure" script builds the following files for the basic C library:
|
||||||
|
|
||||||
|
. Makefile the makefile that builds the library
|
||||||
|
. config.h build-time configuration options for the library
|
||||||
|
. pcre.h the public PCRE header file
|
||||||
|
. pcre-config script that shows the building settings such as CFLAGS
|
||||||
|
that were set for "configure"
|
||||||
|
. libpcre.pc ) data for the pkg-config command
|
||||||
|
. libpcre16.pc )
|
||||||
|
. libpcre32.pc )
|
||||||
|
. libpcreposix.pc )
|
||||||
|
. libtool script that builds shared and/or static libraries
|
||||||
|
|
||||||
|
Versions of config.h and pcre.h are distributed in the PCRE tarballs under the
|
||||||
|
names config.h.generic and pcre.h.generic. These are provided for those who
|
||||||
|
have to built PCRE without using "configure" or CMake. If you use "configure"
|
||||||
|
or CMake, the .generic versions are not used.
|
||||||
|
|
||||||
|
When building the 8-bit library, if a C++ compiler is found, the following
|
||||||
|
files are also built:
|
||||||
|
|
||||||
|
. libpcrecpp.pc data for the pkg-config command
|
||||||
|
. pcrecpparg.h header file for calling PCRE via the C++ wrapper
|
||||||
|
. pcre_stringpiece.h header for the C++ "stringpiece" functions
|
||||||
|
|
||||||
|
The "configure" script also creates config.status, which is an executable
|
||||||
|
script that can be run to recreate the configuration, and config.log, which
|
||||||
|
contains compiler output from tests that "configure" runs.
|
||||||
|
|
||||||
|
Once "configure" has run, you can run "make". This builds the the libraries
|
||||||
|
libpcre, libpcre16 and/or libpcre32, and a test program called pcretest. If you
|
||||||
|
enabled JIT support with --enable-jit, a test program called pcre_jit_test is
|
||||||
|
built as well.
|
||||||
|
|
||||||
|
If the 8-bit library is built, libpcreposix and the pcregrep command are also
|
||||||
|
built, and if a C++ compiler was found on your system, and you did not disable
|
||||||
|
it with --disable-cpp, "make" builds the C++ wrapper library, which is called
|
||||||
|
libpcrecpp, as well as some test programs called pcrecpp_unittest,
|
||||||
|
pcre_scanner_unittest, and pcre_stringpiece_unittest.
|
||||||
|
|
||||||
|
The command "make check" runs all the appropriate tests. Details of the PCRE
|
||||||
|
tests are given below in a separate section of this document.
|
||||||
|
|
||||||
|
You can use "make install" to install PCRE into live directories on your
|
||||||
|
system. The following are installed (file names are all relative to the
|
||||||
|
<prefix> that is set when "configure" is run):
|
||||||
|
|
||||||
|
Commands (bin):
|
||||||
|
pcretest
|
||||||
|
pcregrep (if 8-bit support is enabled)
|
||||||
|
pcre-config
|
||||||
|
|
||||||
|
Libraries (lib):
|
||||||
|
libpcre16 (if 16-bit support is enabled)
|
||||||
|
libpcre32 (if 32-bit support is enabled)
|
||||||
|
libpcre (if 8-bit support is enabled)
|
||||||
|
libpcreposix (if 8-bit support is enabled)
|
||||||
|
libpcrecpp (if 8-bit and C++ support is enabled)
|
||||||
|
|
||||||
|
Configuration information (lib/pkgconfig):
|
||||||
|
libpcre16.pc
|
||||||
|
libpcre32.pc
|
||||||
|
libpcre.pc
|
||||||
|
libpcreposix.pc
|
||||||
|
libpcrecpp.pc (if C++ support is enabled)
|
||||||
|
|
||||||
|
Header files (include):
|
||||||
|
pcre.h
|
||||||
|
pcreposix.h
|
||||||
|
pcre_scanner.h )
|
||||||
|
pcre_stringpiece.h ) if C++ support is enabled
|
||||||
|
pcrecpp.h )
|
||||||
|
pcrecpparg.h )
|
||||||
|
|
||||||
|
Man pages (share/man/man{1,3}):
|
||||||
|
pcregrep.1
|
||||||
|
pcretest.1
|
||||||
|
pcre-config.1
|
||||||
|
pcre.3
|
||||||
|
pcre*.3 (lots more pages, all starting "pcre")
|
||||||
|
|
||||||
|
HTML documentation (share/doc/pcre/html):
|
||||||
|
index.html
|
||||||
|
*.html (lots more pages, hyperlinked from index.html)
|
||||||
|
|
||||||
|
Text file documentation (share/doc/pcre):
|
||||||
|
AUTHORS
|
||||||
|
COPYING
|
||||||
|
ChangeLog
|
||||||
|
LICENCE
|
||||||
|
NEWS
|
||||||
|
README
|
||||||
|
pcre.txt (a concatenation of the man(3) pages)
|
||||||
|
pcretest.txt the pcretest man page
|
||||||
|
pcregrep.txt the pcregrep man page
|
||||||
|
pcre-config.txt the pcre-config man page
|
||||||
|
|
||||||
|
If you want to remove PCRE from your system, you can run "make uninstall".
|
||||||
|
This removes all the files that "make install" installed. However, it does not
|
||||||
|
remove any directories, because these are often shared with other programs.
|
||||||
|
|
||||||
|
|
||||||
|
Retrieving configuration information
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
Running "make install" installs the command pcre-config, which can be used to
|
||||||
|
recall information about the PCRE configuration and installation. For example:
|
||||||
|
|
||||||
|
pcre-config --version
|
||||||
|
|
||||||
|
prints the version number, and
|
||||||
|
|
||||||
|
pcre-config --libs
|
||||||
|
|
||||||
|
outputs information about where the library is installed. This command can be
|
||||||
|
included in makefiles for programs that use PCRE, saving the programmer from
|
||||||
|
having to remember too many details.
|
||||||
|
|
||||||
|
The pkg-config command is another system for saving and retrieving information
|
||||||
|
about installed libraries. Instead of separate commands for each library, a
|
||||||
|
single command is used. For example:
|
||||||
|
|
||||||
|
pkg-config --cflags pcre
|
||||||
|
|
||||||
|
The data is held in *.pc files that are installed in a directory called
|
||||||
|
<prefix>/lib/pkgconfig.
|
||||||
|
|
||||||
|
|
||||||
|
Shared libraries
|
||||||
|
----------------
|
||||||
|
|
||||||
|
The default distribution builds PCRE as shared libraries and static libraries,
|
||||||
|
as long as the operating system supports shared libraries. Shared library
|
||||||
|
support relies on the "libtool" script which is built as part of the
|
||||||
|
"configure" process.
|
||||||
|
|
||||||
|
The libtool script is used to compile and link both shared and static
|
||||||
|
libraries. They are placed in a subdirectory called .libs when they are newly
|
||||||
|
built. The programs pcretest and pcregrep are built to use these uninstalled
|
||||||
|
libraries (by means of wrapper scripts in the case of shared libraries). When
|
||||||
|
you use "make install" to install shared libraries, pcregrep and pcretest are
|
||||||
|
automatically re-built to use the newly installed shared libraries before being
|
||||||
|
installed themselves. However, the versions left in the build directory still
|
||||||
|
use the uninstalled libraries.
|
||||||
|
|
||||||
|
To build PCRE using static libraries only you must use --disable-shared when
|
||||||
|
configuring it. For example:
|
||||||
|
|
||||||
|
./configure --prefix=/usr/gnu --disable-shared
|
||||||
|
|
||||||
|
Then run "make" in the usual way. Similarly, you can use --disable-static to
|
||||||
|
build only shared libraries.
|
||||||
|
|
||||||
|
|
||||||
|
Cross-compiling using autotools
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
||||||
|
order to cross-compile PCRE for some other host. However, you should NOT
|
||||||
|
specify --enable-rebuild-chartables, because if you do, the dftables.c source
|
||||||
|
file is compiled and run on the local host, in order to generate the inbuilt
|
||||||
|
character tables (the pcre_chartables.c file). This will probably not work,
|
||||||
|
because dftables.c needs to be compiled with the local compiler, not the cross
|
||||||
|
compiler.
|
||||||
|
|
||||||
|
When --enable-rebuild-chartables is not specified, pcre_chartables.c is created
|
||||||
|
by making a copy of pcre_chartables.c.dist, which is a default set of tables
|
||||||
|
that assumes ASCII code. Cross-compiling with the default tables should not be
|
||||||
|
a problem.
|
||||||
|
|
||||||
|
If you need to modify the character tables when cross-compiling, you should
|
||||||
|
move pcre_chartables.c.dist out of the way, then compile dftables.c by hand and
|
||||||
|
run it on the local host to make a new version of pcre_chartables.c.dist.
|
||||||
|
Then when you cross-compile PCRE this new version of the tables will be used.
|
||||||
|
|
||||||
|
|
||||||
|
Using HP's ANSI C++ compiler (aCC)
|
||||||
|
----------------------------------
|
||||||
|
|
||||||
|
Unless C++ support is disabled by specifying the "--disable-cpp" option of the
|
||||||
|
"configure" script, you must include the "-AA" option in the CXXFLAGS
|
||||||
|
environment variable in order for the C++ components to compile correctly.
|
||||||
|
|
||||||
|
Also, note that the aCC compiler on PA-RISC platforms may have a defect whereby
|
||||||
|
needed libraries fail to get included when specifying the "-AA" compiler
|
||||||
|
option. If you experience unresolved symbols when linking the C++ programs,
|
||||||
|
use the workaround of specifying the following environment variable prior to
|
||||||
|
running the "configure" script:
|
||||||
|
|
||||||
|
CXXLDFLAGS="-lstd_v2 -lCsup_v2"
|
||||||
|
|
||||||
|
|
||||||
|
Compiling in Tru64 using native compilers
|
||||||
|
-----------------------------------------
|
||||||
|
|
||||||
|
The following error may occur when compiling with native compilers in the Tru64
|
||||||
|
operating system:
|
||||||
|
|
||||||
|
CXX libpcrecpp_la-pcrecpp.lo
|
||||||
|
cxx: Error: /usr/lib/cmplrs/cxx/V7.1-006/include/cxx/iosfwd, line 58: #error
|
||||||
|
directive: "cannot include iosfwd -- define __USE_STD_IOSTREAM to
|
||||||
|
override default - see section 7.1.2 of the C++ Using Guide"
|
||||||
|
#error "cannot include iosfwd -- define __USE_STD_IOSTREAM to override default
|
||||||
|
- see section 7.1.2 of the C++ Using Guide"
|
||||||
|
|
||||||
|
This may be followed by other errors, complaining that 'namespace "std" has no
|
||||||
|
member'. The solution to this is to add the line
|
||||||
|
|
||||||
|
#define __USE_STD_IOSTREAM 1
|
||||||
|
|
||||||
|
to the config.h file.
|
||||||
|
|
||||||
|
|
||||||
|
Using Sun's compilers for Solaris
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
A user reports that the following configurations work on Solaris 9 sparcv9 and
|
||||||
|
Solaris 9 x86 (32-bit):
|
||||||
|
|
||||||
|
Solaris 9 sparcv9: ./configure --disable-cpp CC=/bin/cc CFLAGS="-m64 -g"
|
||||||
|
Solaris 9 x86: ./configure --disable-cpp CC=/bin/cc CFLAGS="-g"
|
||||||
|
|
||||||
|
|
||||||
|
Using PCRE from MySQL
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
On systems where both PCRE and MySQL are installed, it is possible to make use
|
||||||
|
of PCRE from within MySQL, as an alternative to the built-in pattern matching.
|
||||||
|
There is a web page that tells you how to do this:
|
||||||
|
|
||||||
|
http://www.mysqludf.org/lib_mysqludf_preg/index.php
|
||||||
|
|
||||||
|
|
||||||
|
Making new tarballs
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
The command "make dist" creates three PCRE tarballs, in tar.gz, tar.bz2, and
|
||||||
|
zip formats. The command "make distcheck" does the same, but then does a trial
|
||||||
|
build of the new distribution to ensure that it works.
|
||||||
|
|
||||||
|
If you have modified any of the man page sources in the doc directory, you
|
||||||
|
should first run the PrepareRelease script before making a distribution. This
|
||||||
|
script creates the .txt and HTML forms of the documentation from the man pages.
|
||||||
|
|
||||||
|
|
||||||
|
Testing PCRE
|
||||||
|
------------
|
||||||
|
|
||||||
|
To test the basic PCRE library on a Unix-like system, run the RunTest script.
|
||||||
|
There is another script called RunGrepTest that tests the options of the
|
||||||
|
pcregrep command. If the C++ wrapper library is built, three test programs
|
||||||
|
called pcrecpp_unittest, pcre_scanner_unittest, and pcre_stringpiece_unittest
|
||||||
|
are also built. When JIT support is enabled, another test program called
|
||||||
|
pcre_jit_test is built.
|
||||||
|
|
||||||
|
Both the scripts and all the program tests are run if you obey "make check" or
|
||||||
|
"make test". For other environments, see the instructions in
|
||||||
|
NON-AUTOTOOLS-BUILD.
|
||||||
|
|
||||||
|
The RunTest script runs the pcretest test program (which is documented in its
|
||||||
|
own man page) on each of the relevant testinput files in the testdata
|
||||||
|
directory, and compares the output with the contents of the corresponding
|
||||||
|
testoutput files. RunTest uses a file called testtry to hold the main output
|
||||||
|
from pcretest. Other files whose names begin with "test" are used as working
|
||||||
|
files in some tests.
|
||||||
|
|
||||||
|
Some tests are relevant only when certain build-time options were selected. For
|
||||||
|
example, the tests for UTF-8/16/32 support are run only if --enable-utf was
|
||||||
|
used. RunTest outputs a comment when it skips a test.
|
||||||
|
|
||||||
|
Many of the tests that are not skipped are run up to three times. The second
|
||||||
|
run forces pcre_study() to be called for all patterns except for a few in some
|
||||||
|
tests that are marked "never study" (see the pcretest program for how this is
|
||||||
|
done). If JIT support is available, the non-DFA tests are run a third time,
|
||||||
|
this time with a forced pcre_study() with the PCRE_STUDY_JIT_COMPILE option.
|
||||||
|
This testing can be suppressed by putting "nojit" on the RunTest command line.
|
||||||
|
|
||||||
|
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
|
||||||
|
libraries that are enabled. If you want to run just one set of tests, call
|
||||||
|
RunTest with either the -8, -16 or -32 option.
|
||||||
|
|
||||||
|
If valgrind is installed, you can run the tests under it by putting "valgrind"
|
||||||
|
on the RunTest command line. To run pcretest on just one or more specific test
|
||||||
|
files, give their numbers as arguments to RunTest, for example:
|
||||||
|
|
||||||
|
RunTest 2 7 11
|
||||||
|
|
||||||
|
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
|
||||||
|
end), or a number preceded by ~ to exclude a test. For example:
|
||||||
|
|
||||||
|
Runtest 3-15 ~10
|
||||||
|
|
||||||
|
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
|
||||||
|
except test 13. Whatever order the arguments are in, the tests are always run
|
||||||
|
in numerical order.
|
||||||
|
|
||||||
|
You can also call RunTest with the single argument "list" to cause it to output
|
||||||
|
a list of tests.
|
||||||
|
|
||||||
|
The first test file can be fed directly into the perltest.pl script to check
|
||||||
|
that Perl gives the same results. The only difference you should see is in the
|
||||||
|
first few lines, where the Perl version is given instead of the PCRE version.
|
||||||
|
|
||||||
|
The second set of tests check pcre_fullinfo(), pcre_study(),
|
||||||
|
pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error
|
||||||
|
detection, and run-time flags that are specific to PCRE, as well as the POSIX
|
||||||
|
wrapper API. It also uses the debugging flags to check some of the internals of
|
||||||
|
pcre_compile().
|
||||||
|
|
||||||
|
If you build PCRE with a locale setting that is not the standard C locale, the
|
||||||
|
character tables may be different (see next paragraph). In some cases, this may
|
||||||
|
cause failures in the second set of tests. For example, in a locale where the
|
||||||
|
isprint() function yields TRUE for characters in the range 128-255, the use of
|
||||||
|
[:isascii:] inside a character class defines a different set of characters, and
|
||||||
|
this shows up in this test as a difference in the compiled code, which is being
|
||||||
|
listed for checking. Where the comparison test output contains [\x00-\x7f] the
|
||||||
|
test will contain [\x00-\xff], and similarly in some other cases. This is not a
|
||||||
|
bug in PCRE.
|
||||||
|
|
||||||
|
The third set of tests checks pcre_maketables(), the facility for building a
|
||||||
|
set of character tables for a specific locale and using them instead of the
|
||||||
|
default tables. The tests make use of the "fr_FR" (French) locale. Before
|
||||||
|
running the test, the script checks for the presence of this locale by running
|
||||||
|
the "locale" command. If that command fails, or if it doesn't include "fr_FR"
|
||||||
|
in the list of available locales, the third test cannot be run, and a comment
|
||||||
|
is output to say why. If running this test produces instances of the error
|
||||||
|
|
||||||
|
** Failed to set locale "fr_FR"
|
||||||
|
|
||||||
|
in the comparison output, it means that locale is not available on your system,
|
||||||
|
despite being listed by "locale". This does not mean that PCRE is broken.
|
||||||
|
|
||||||
|
[If you are trying to run this test on Windows, you may be able to get it to
|
||||||
|
work by changing "fr_FR" to "french" everywhere it occurs. Alternatively, use
|
||||||
|
RunTest.bat. The version of RunTest.bat included with PCRE 7.4 and above uses
|
||||||
|
Windows versions of test 2. More info on using RunTest.bat is included in the
|
||||||
|
document entitled NON-UNIX-USE.]
|
||||||
|
|
||||||
|
The fourth and fifth tests check the UTF-8/16/32 support and error handling and
|
||||||
|
internal UTF features of PCRE that are not relevant to Perl, respectively. The
|
||||||
|
sixth and seventh tests do the same for Unicode character properties support.
|
||||||
|
|
||||||
|
The eighth, ninth, and tenth tests check the pcre_dfa_exec() alternative
|
||||||
|
matching function, in non-UTF-8/16/32 mode, UTF-8/16/32 mode, and UTF-8/16/32
|
||||||
|
mode with Unicode property support, respectively.
|
||||||
|
|
||||||
|
The eleventh test checks some internal offsets and code size features; it is
|
||||||
|
run only when the default "link size" of 2 is set (in other cases the sizes
|
||||||
|
change) and when Unicode property support is enabled.
|
||||||
|
|
||||||
|
The twelfth test is run only when JIT support is available, and the thirteenth
|
||||||
|
test is run only when JIT support is not available. They test some JIT-specific
|
||||||
|
features such as information output from pcretest about JIT compilation.
|
||||||
|
|
||||||
|
The fourteenth, fifteenth, and sixteenth tests are run only in 8-bit mode, and
|
||||||
|
the seventeenth, eighteenth, and nineteenth tests are run only in 16/32-bit
|
||||||
|
mode. These are tests that generate different output in the two modes. They are
|
||||||
|
for general cases, UTF-8/16/32 support, and Unicode property support,
|
||||||
|
respectively.
|
||||||
|
|
||||||
|
The twentieth test is run only in 16/32-bit mode. It tests some specific
|
||||||
|
16/32-bit features of the DFA matching engine.
|
||||||
|
|
||||||
|
The twenty-first and twenty-second tests are run only in 16/32-bit mode, when
|
||||||
|
the link size is set to 2 for the 16-bit library. They test reloading
|
||||||
|
pre-compiled patterns.
|
||||||
|
|
||||||
|
The twenty-third and twenty-fourth tests are run only in 16-bit mode. They are
|
||||||
|
for general cases, and UTF-16 support, respectively.
|
||||||
|
|
||||||
|
The twenty-fifth and twenty-sixth tests are run only in 32-bit mode. They are
|
||||||
|
for general cases, and UTF-32 support, respectively.
|
||||||
|
|
||||||
|
|
||||||
|
Character tables
|
||||||
|
----------------
|
||||||
|
|
||||||
|
For speed, PCRE uses four tables for manipulating and identifying characters
|
||||||
|
whose code point values are less than 256. The final argument of the
|
||||||
|
pcre_compile() function is a pointer to a block of memory containing the
|
||||||
|
concatenated tables. A call to pcre_maketables() can be used to generate a set
|
||||||
|
of tables in the current locale. If the final argument for pcre_compile() is
|
||||||
|
passed as NULL, a set of default tables that is built into the binary is used.
|
||||||
|
|
||||||
|
The source file called pcre_chartables.c contains the default set of tables. By
|
||||||
|
default, this is created as a copy of pcre_chartables.c.dist, which contains
|
||||||
|
tables for ASCII coding. However, if --enable-rebuild-chartables is specified
|
||||||
|
for ./configure, a different version of pcre_chartables.c is built by the
|
||||||
|
program dftables (compiled from dftables.c), which uses the ANSI C character
|
||||||
|
handling functions such as isalnum(), isalpha(), isupper(), islower(), etc. to
|
||||||
|
build the table sources. This means that the default C locale which is set for
|
||||||
|
your system will control the contents of these default tables. You can change
|
||||||
|
the default tables by editing pcre_chartables.c and then re-building PCRE. If
|
||||||
|
you do this, you should take care to ensure that the file does not get
|
||||||
|
automatically re-generated. The best way to do this is to move
|
||||||
|
pcre_chartables.c.dist out of the way and replace it with your customized
|
||||||
|
tables.
|
||||||
|
|
||||||
|
When the dftables program is run as a result of --enable-rebuild-chartables,
|
||||||
|
it uses the default C locale that is set on your system. It does not pay
|
||||||
|
attention to the LC_xxx environment variables. In other words, it uses the
|
||||||
|
system's default locale rather than whatever the compiling user happens to have
|
||||||
|
set. If you really do want to build a source set of character tables in a
|
||||||
|
locale that is specified by the LC_xxx variables, you can run the dftables
|
||||||
|
program by hand with the -L option. For example:
|
||||||
|
|
||||||
|
./dftables -L pcre_chartables.c.special
|
||||||
|
|
||||||
|
The first two 256-byte tables provide lower casing and case flipping functions,
|
||||||
|
respectively. The next table consists of three 32-byte bit maps which identify
|
||||||
|
digits, "word" characters, and white space, respectively. These are used when
|
||||||
|
building 32-byte bit maps that represent character classes for code points less
|
||||||
|
than 256.
|
||||||
|
|
||||||
|
The final 256-byte table has bits indicating various character types, as
|
||||||
|
follows:
|
||||||
|
|
||||||
|
1 white space character
|
||||||
|
2 letter
|
||||||
|
4 decimal digit
|
||||||
|
8 hexadecimal digit
|
||||||
|
16 alphanumeric or '_'
|
||||||
|
128 regular expression metacharacter or binary zero
|
||||||
|
|
||||||
|
You should not alter the set of characters that contain the 128 bit, as that
|
||||||
|
will cause PCRE to malfunction.
|
||||||
|
|
||||||
|
|
||||||
|
File manifest
|
||||||
|
-------------
|
||||||
|
|
||||||
|
The distribution should contain the files listed below. Where a file name is
|
||||||
|
given as pcre[16|32]_xxx it means that there are three files, one with the name
|
||||||
|
pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
|
||||||
|
|
||||||
|
(A) Source files of the PCRE library functions and their headers:
|
||||||
|
|
||||||
|
dftables.c auxiliary program for building pcre_chartables.c
|
||||||
|
when --enable-rebuild-chartables is specified
|
||||||
|
|
||||||
|
pcre_chartables.c.dist a default set of character tables that assume ASCII
|
||||||
|
coding; used, unless --enable-rebuild-chartables is
|
||||||
|
specified, by copying to pcre[16]_chartables.c
|
||||||
|
|
||||||
|
pcreposix.c )
|
||||||
|
pcre[16|32]_byte_order.c )
|
||||||
|
pcre[16|32]_compile.c )
|
||||||
|
pcre[16|32]_config.c )
|
||||||
|
pcre[16|32]_dfa_exec.c )
|
||||||
|
pcre[16|32]_exec.c )
|
||||||
|
pcre[16|32]_fullinfo.c )
|
||||||
|
pcre[16|32]_get.c ) sources for the functions in the library,
|
||||||
|
pcre[16|32]_globals.c ) and some internal functions that they use
|
||||||
|
pcre[16|32]_jit_compile.c )
|
||||||
|
pcre[16|32]_maketables.c )
|
||||||
|
pcre[16|32]_newline.c )
|
||||||
|
pcre[16|32]_refcount.c )
|
||||||
|
pcre[16|32]_string_utils.c )
|
||||||
|
pcre[16|32]_study.c )
|
||||||
|
pcre[16|32]_tables.c )
|
||||||
|
pcre[16|32]_ucd.c )
|
||||||
|
pcre[16|32]_version.c )
|
||||||
|
pcre[16|32]_xclass.c )
|
||||||
|
pcre_ord2utf8.c )
|
||||||
|
pcre_valid_utf8.c )
|
||||||
|
pcre16_ord2utf16.c )
|
||||||
|
pcre16_utf16_utils.c )
|
||||||
|
pcre16_valid_utf16.c )
|
||||||
|
pcre32_utf32_utils.c )
|
||||||
|
pcre32_valid_utf32.c )
|
||||||
|
|
||||||
|
pcre[16|32]_printint.c ) debugging function that is used by pcretest,
|
||||||
|
) and can also be #included in pcre_compile()
|
||||||
|
|
||||||
|
pcre.h.in template for pcre.h when built by "configure"
|
||||||
|
pcreposix.h header for the external POSIX wrapper API
|
||||||
|
pcre_internal.h header for internal use
|
||||||
|
sljit/* 16 files that make up the JIT compiler
|
||||||
|
ucp.h header for Unicode property handling
|
||||||
|
|
||||||
|
config.h.in template for config.h, which is built by "configure"
|
||||||
|
|
||||||
|
pcrecpp.h public header file for the C++ wrapper
|
||||||
|
pcrecpparg.h.in template for another C++ header file
|
||||||
|
pcre_scanner.h public header file for C++ scanner functions
|
||||||
|
pcrecpp.cc )
|
||||||
|
pcre_scanner.cc ) source for the C++ wrapper library
|
||||||
|
|
||||||
|
pcre_stringpiece.h.in template for pcre_stringpiece.h, the header for the
|
||||||
|
C++ stringpiece functions
|
||||||
|
pcre_stringpiece.cc source for the C++ stringpiece functions
|
||||||
|
|
||||||
|
(B) Source files for programs that use PCRE:
|
||||||
|
|
||||||
|
pcredemo.c simple demonstration of coding calls to PCRE
|
||||||
|
pcregrep.c source of a grep utility that uses PCRE
|
||||||
|
pcretest.c comprehensive test program
|
||||||
|
|
||||||
|
(C) Auxiliary files:
|
||||||
|
|
||||||
|
132html script to turn "man" pages into HTML
|
||||||
|
AUTHORS information about the author of PCRE
|
||||||
|
ChangeLog log of changes to the code
|
||||||
|
CleanTxt script to clean nroff output for txt man pages
|
||||||
|
Detrail script to remove trailing spaces
|
||||||
|
HACKING some notes about the internals of PCRE
|
||||||
|
INSTALL generic installation instructions
|
||||||
|
LICENCE conditions for the use of PCRE
|
||||||
|
COPYING the same, using GNU's standard name
|
||||||
|
Makefile.in ) template for Unix Makefile, which is built by
|
||||||
|
) "configure"
|
||||||
|
Makefile.am ) the automake input that was used to create
|
||||||
|
) Makefile.in
|
||||||
|
NEWS important changes in this release
|
||||||
|
NON-UNIX-USE the previous name for NON-AUTOTOOLS-BUILD
|
||||||
|
NON-AUTOTOOLS-BUILD notes on building PCRE without using autotools
|
||||||
|
PrepareRelease script to make preparations for "make dist"
|
||||||
|
README this file
|
||||||
|
RunTest a Unix shell script for running tests
|
||||||
|
RunGrepTest a Unix shell script for pcregrep tests
|
||||||
|
aclocal.m4 m4 macros (generated by "aclocal")
|
||||||
|
config.guess ) files used by libtool,
|
||||||
|
config.sub ) used only when building a shared library
|
||||||
|
configure a configuring shell script (built by autoconf)
|
||||||
|
configure.ac ) the autoconf input that was used to build
|
||||||
|
) "configure" and config.h
|
||||||
|
depcomp ) script to find program dependencies, generated by
|
||||||
|
) automake
|
||||||
|
doc/*.3 man page sources for PCRE
|
||||||
|
doc/*.1 man page sources for pcregrep and pcretest
|
||||||
|
doc/index.html.src the base HTML page
|
||||||
|
doc/html/* HTML documentation
|
||||||
|
doc/pcre.txt plain text version of the man pages
|
||||||
|
doc/pcretest.txt plain text documentation of test program
|
||||||
|
doc/perltest.txt plain text documentation of Perl test program
|
||||||
|
install-sh a shell script for installing files
|
||||||
|
libpcre16.pc.in template for libpcre16.pc for pkg-config
|
||||||
|
libpcre32.pc.in template for libpcre32.pc for pkg-config
|
||||||
|
libpcre.pc.in template for libpcre.pc for pkg-config
|
||||||
|
libpcreposix.pc.in template for libpcreposix.pc for pkg-config
|
||||||
|
libpcrecpp.pc.in template for libpcrecpp.pc for pkg-config
|
||||||
|
ltmain.sh file used to build a libtool script
|
||||||
|
missing ) common stub for a few missing GNU programs while
|
||||||
|
) installing, generated by automake
|
||||||
|
mkinstalldirs script for making install directories
|
||||||
|
perltest.pl Perl test program
|
||||||
|
pcre-config.in source of script which retains PCRE information
|
||||||
|
pcre_jit_test.c test program for the JIT compiler
|
||||||
|
pcrecpp_unittest.cc )
|
||||||
|
pcre_scanner_unittest.cc ) test programs for the C++ wrapper
|
||||||
|
pcre_stringpiece_unittest.cc )
|
||||||
|
testdata/testinput* test data for main library tests
|
||||||
|
testdata/testoutput* expected test results
|
||||||
|
testdata/grep* input and output for pcregrep tests
|
||||||
|
testdata/* other supporting test files
|
||||||
|
|
||||||
|
(D) Auxiliary files for cmake support
|
||||||
|
|
||||||
|
cmake/COPYING-CMAKE-SCRIPTS
|
||||||
|
cmake/FindPackageHandleStandardArgs.cmake
|
||||||
|
cmake/FindEditline.cmake
|
||||||
|
cmake/FindReadline.cmake
|
||||||
|
CMakeLists.txt
|
||||||
|
config-cmake.h.in
|
||||||
|
|
||||||
|
(E) Auxiliary files for VPASCAL
|
||||||
|
|
||||||
|
makevp.bat
|
||||||
|
makevp_c.txt
|
||||||
|
makevp_l.txt
|
||||||
|
pcregexp.pas
|
||||||
|
|
||||||
|
(F) Auxiliary files for building PCRE "by hand"
|
||||||
|
|
||||||
|
pcre.h.generic ) a version of the public PCRE header file
|
||||||
|
) for use in non-"configure" environments
|
||||||
|
config.h.generic ) a version of config.h for use in non-"configure"
|
||||||
|
) environments
|
||||||
|
|
||||||
|
(F) Miscellaneous
|
||||||
|
|
||||||
|
RunTest.bat a script for running tests under Windows
|
||||||
|
|
||||||
|
Philip Hazel
|
||||||
|
Email local part: ph10
|
||||||
|
Email domain: cam.ac.uk
|
||||||
|
Last updated: 17 January 2014
|
@ -11,27 +11,29 @@
|
|||||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
<h1>Perl-compatible Regular Expressions (PCRE)</h1>
|
<h1>Perl-compatible Regular Expressions (PCRE)</h1>
|
||||||
<p>
|
<p>
|
||||||
The HTML documentation for PCRE comprises the following pages:
|
The HTML documentation for PCRE consists of a number of pages that are listed
|
||||||
|
below in alphabetical order. If you are new to PCRE, please read the first one
|
||||||
|
first.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<table>
|
<table>
|
||||||
<tr><td><a href="pcre.html">pcre</a></td>
|
<tr><td><a href="pcre.html">pcre</a></td>
|
||||||
<td> Introductory page</td></tr>
|
<td> Introductory page</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre-config.html">pcre-config</a></td>
|
||||||
|
<td> Information about the installation configuration</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre16.html">pcre16</a></td>
|
<tr><td><a href="pcre16.html">pcre16</a></td>
|
||||||
<td> Discussion of the 16-bit PCRE library</td></tr>
|
<td> Discussion of the 16-bit PCRE library</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre32.html">pcre32</a></td>
|
<tr><td><a href="pcre32.html">pcre32</a></td>
|
||||||
<td> Discussion of the 32-bit PCRE library</td></tr>
|
<td> Discussion of the 32-bit PCRE library</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre-config.html">pcre-config</a></td>
|
|
||||||
<td> Information about the installation configuration</td></tr>
|
|
||||||
|
|
||||||
<tr><td><a href="pcreapi.html">pcreapi</a></td>
|
<tr><td><a href="pcreapi.html">pcreapi</a></td>
|
||||||
<td> PCRE's native API</td></tr>
|
<td> PCRE's native API</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcrebuild.html">pcrebuild</a></td>
|
<tr><td><a href="pcrebuild.html">pcrebuild</a></td>
|
||||||
<td> Options for building PCRE</td></tr>
|
<td> Building PCRE</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcrecallout.html">pcrecallout</a></td>
|
<tr><td><a href="pcrecallout.html">pcrecallout</a></td>
|
||||||
<td> The <i>callout</i> facility</td></tr>
|
<td> The <i>callout</i> facility</td></tr>
|
||||||
@ -67,7 +69,7 @@ The HTML documentation for PCRE comprises the following pages:
|
|||||||
<td> Some comments on performance</td></tr>
|
<td> Some comments on performance</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcreposix.html">pcreposix</a></td>
|
<tr><td><a href="pcreposix.html">pcreposix</a></td>
|
||||||
<td> The POSIX API to the PCRE library</td></tr>
|
<td> The POSIX API to the PCRE 8-bit library</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcreprecompile.html">pcreprecompile</a></td>
|
<tr><td><a href="pcreprecompile.html">pcreprecompile</a></td>
|
||||||
<td> How to save and re-use compiled patterns</td></tr>
|
<td> How to save and re-use compiled patterns</td></tr>
|
||||||
@ -118,13 +120,13 @@ functions.
|
|||||||
<td> Match a compiled pattern to a subject string
|
<td> Match a compiled pattern to a subject string
|
||||||
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
|
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
|
|
||||||
<td> Free study data</td></tr>
|
|
||||||
|
|
||||||
<tr><td><a href="pcre_exec.html">pcre_exec</a></td>
|
<tr><td><a href="pcre_exec.html">pcre_exec</a></td>
|
||||||
<td> Match a compiled pattern to a subject string
|
<td> Match a compiled pattern to a subject string
|
||||||
(Perl compatible)</td></tr>
|
(Perl compatible)</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
|
||||||
|
<td> Free study data</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_free_substring.html">pcre_free_substring</a></td>
|
<tr><td><a href="pcre_free_substring.html">pcre_free_substring</a></td>
|
||||||
<td> Free extracted substring</td></tr>
|
<td> Free extracted substring</td></tr>
|
||||||
|
|
||||||
@ -140,14 +142,17 @@ functions.
|
|||||||
<tr><td><a href="pcre_get_stringnumber.html">pcre_get_stringnumber</a></td>
|
<tr><td><a href="pcre_get_stringnumber.html">pcre_get_stringnumber</a></td>
|
||||||
<td> Convert captured string name to number</td></tr>
|
<td> Convert captured string name to number</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_get_stringtable_entries.html">pcre_get_stringtable_entries</a></td>
|
||||||
|
<td> Find table entries for given string name</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_get_substring.html">pcre_get_substring</a></td>
|
<tr><td><a href="pcre_get_substring.html">pcre_get_substring</a></td>
|
||||||
<td> Extract numbered substring into new memory</td></tr>
|
<td> Extract numbered substring into new memory</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_get_substring_list.html">pcre_get_substring_list</a></td>
|
<tr><td><a href="pcre_get_substring_list.html">pcre_get_substring_list</a></td>
|
||||||
<td> Extract all substrings into new memory</td></tr>
|
<td> Extract all substrings into new memory</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_info.html">pcre_info</a></td>
|
<tr><td><a href="pcre_jit_exec.html">pcre_jit_exec</a></td>
|
||||||
<td> Obsolete information extraction function</td></tr>
|
<td> Fast path interface to JIT matching</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_jit_stack_alloc.html">pcre_jit_stack_alloc</a></td>
|
<tr><td><a href="pcre_jit_stack_alloc.html">pcre_jit_stack_alloc</a></td>
|
||||||
<td> Create a stack for JIT matching</td></tr>
|
<td> Create a stack for JIT matching</td></tr>
|
||||||
|
@ -38,9 +38,9 @@ Herczeg.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Starting with release 8.32 it is possible to compile a third separate PCRE
|
Starting with release 8.32 it is possible to compile a third separate PCRE
|
||||||
library, which supports 32-bit character strings (including
|
library that supports 32-bit character strings (including UTF-32 strings). The
|
||||||
UTF-32 strings). The build process allows any set of the 8-, 16- and 32-bit
|
build process allows any combination of the 8-, 16- and 32-bit libraries. The
|
||||||
libraries. The work to make this possible was done by Christian Persch.
|
work to make this possible was done by Christian Persch.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The three libraries contain identical sets of functions, except that the names
|
The three libraries contain identical sets of functions, except that the names
|
||||||
@ -62,7 +62,7 @@ The current implementation of PCRE corresponds approximately with Perl 5.12,
|
|||||||
including support for UTF-8/16/32 encoded strings and Unicode general category
|
including support for UTF-8/16/32 encoded strings and Unicode general category
|
||||||
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
|
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
|
||||||
enabled; it is not the default. The Unicode tables correspond to Unicode
|
enabled; it is not the default. The Unicode tables correspond to Unicode
|
||||||
release 6.2.0.
|
release 6.3.0.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
In addition to the Perl-compatible matching function, PCRE contains an
|
In addition to the Perl-compatible matching function, PCRE contains an
|
||||||
@ -100,8 +100,11 @@ function makes it possible for a client to discover which features are
|
|||||||
available. The features themselves are described in the
|
available. The features themselves are described in the
|
||||||
<a href="pcrebuild.html"><b>pcrebuild</b></a>
|
<a href="pcrebuild.html"><b>pcrebuild</b></a>
|
||||||
page. Documentation about building PCRE for various operating systems can be
|
page. Documentation about building PCRE for various operating systems can be
|
||||||
found in the <b>README</b> and <b>NON-AUTOTOOLS_BUILD</b> files in the source
|
found in the
|
||||||
distribution.
|
<a href="README.txt"><b>README</b></a>
|
||||||
|
and
|
||||||
|
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS_BUILD</b></a>
|
||||||
|
files in the source distribution.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The libraries contains a number of undocumented internal functions and data
|
The libraries contains a number of undocumented internal functions and data
|
||||||
@ -126,8 +129,11 @@ use sufficiently many resources as to cause your application to lose
|
|||||||
performance.
|
performance.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The best way of guarding against this possibility is to use the
|
One way of guarding against this possibility is to use the
|
||||||
<b>pcre_fullinfo()</b> function to check the compiled pattern's options for UTF.
|
<b>pcre_fullinfo()</b> function to check the compiled pattern's options for UTF.
|
||||||
|
Alternatively, from release 8.33, you can set the PCRE_NEVER_UTF option at
|
||||||
|
compile time. This causes an compile time error if a pattern contains a
|
||||||
|
UTF-setting sequence.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If your application is one that supports UTF, be aware that validity checking
|
If your application is one that supports UTF, be aware that validity checking
|
||||||
@ -148,15 +154,18 @@ page.
|
|||||||
The user documentation for PCRE comprises a number of different sections. In
|
The user documentation for PCRE comprises a number of different sections. In
|
||||||
the "man" format, each of these is a separate "man page". In the HTML format,
|
the "man" format, each of these is a separate "man page". In the HTML format,
|
||||||
each is a separate page, linked from the index page. In the plain text format,
|
each is a separate page, linked from the index page. In the plain text format,
|
||||||
all the sections, except the <b>pcredemo</b> section, are concatenated, for ease
|
the descriptions of the <b>pcregrep</b> and <b>pcretest</b> programs are in files
|
||||||
of searching. The sections are as follows:
|
called <b>pcregrep.txt</b> and <b>pcretest.txt</b>, respectively. The remaining
|
||||||
|
sections, except for the <b>pcredemo</b> section (which is a program listing),
|
||||||
|
are concatenated in <b>pcre.txt</b>, for ease of searching. The sections are as
|
||||||
|
follows:
|
||||||
<pre>
|
<pre>
|
||||||
pcre this document
|
pcre this document
|
||||||
|
pcre-config show PCRE installation configuration information
|
||||||
pcre16 details of the 16-bit library
|
pcre16 details of the 16-bit library
|
||||||
pcre32 details of the 32-bit library
|
pcre32 details of the 32-bit library
|
||||||
pcre-config show PCRE installation configuration information
|
|
||||||
pcreapi details of PCRE's native C API
|
pcreapi details of PCRE's native C API
|
||||||
pcrebuild options for building PCRE
|
pcrebuild building PCRE
|
||||||
pcrecallout details of the callout feature
|
pcrecallout details of the callout feature
|
||||||
pcrecompat discussion of Perl compatibility
|
pcrecompat discussion of Perl compatibility
|
||||||
pcrecpp details of the C++ wrapper for the 8-bit library
|
pcrecpp details of the C++ wrapper for the 8-bit library
|
||||||
@ -176,8 +185,8 @@ of searching. The sections are as follows:
|
|||||||
pcretest description of the <b>pcretest</b> testing command
|
pcretest description of the <b>pcretest</b> testing command
|
||||||
pcreunicode discussion of Unicode and UTF-8/16/32 support
|
pcreunicode discussion of Unicode and UTF-8/16/32 support
|
||||||
</pre>
|
</pre>
|
||||||
In addition, in the "man" and HTML formats, there is a short page for each
|
In the "man" and HTML formats, there is also a short page for each C library
|
||||||
C library function, listing its arguments and results.
|
function, listing its arguments and results.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC4" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -195,9 +204,9 @@ two digits 10, at the domain cam.ac.uk.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 11 November 2012
|
Last updated: 08 January 2014
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2014 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -44,26 +44,26 @@ man page, in case the conversion went wrong.
|
|||||||
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b> int *<i>errorcodeptr</i>,</b>
|
<b> int *<i>errorcodeptr</i>,</b>
|
||||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
|
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
|
||||||
<b> const char **<i>errptr</i>);</b>
|
<b> const char **<i>errptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre16_free_study(pcre16_extra *<i>extra</i>);</b>
|
<b>void pcre16_free_study(pcre16_extra *<i>extra</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
@ -75,86 +75,86 @@ man page, in case the conversion went wrong.
|
|||||||
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||||
<b> PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
<b> PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
||||||
<b> int <i>buffersize</i>);</b>
|
<b> int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
|
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
|
||||||
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||||
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
|
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR16 <i>name</i>);</b>
|
<b>" PCRE_SPTR16 <i>name</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
|
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
|
||||||
<b> PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
<b> PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
|
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
|
||||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre16_free_substring(PCRE_SPTR16 <i>stringptr</i>);</b>
|
<b>void pcre16_free_substring(PCRE_SPTR16 <i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre16_free_substring_list(PCRE_SPTR16 *<i>stringptr</i>);</b>
|
<b>void pcre16_free_substring_list(PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">PCRE 16-BIT API AUXILIARY FUNCTIONS</a><br>
|
<br><a name="SEC3" href="#TOC1">PCRE 16-BIT API AUXILIARY FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre16_jit_stack_free(pcre16_jit_stack *<i>stack</i>);</b>
|
<b>void pcre16_jit_stack_free(pcre16_jit_stack *<i>stack</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
|
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
|
||||||
<b> pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
<b> pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>const unsigned char *pcre16_maketables(void);</b>
|
<b>const unsigned char *pcre16_maketables(void);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_refcount(pcre16 *<i>code</i>, int <i>adjust</i>);</b>
|
<b>int pcre16_refcount(pcre16 *<i>code</i>, int <i>adjust</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_config(int <i>what</i>, void *<i>where</i>);</b>
|
<b>int pcre16_config(int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>const char *pcre16_version(void);</b>
|
<b>const char *pcre16_version(void);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
|
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
|
||||||
<b> pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
<b> pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">PCRE 16-BIT API INDIRECTED FUNCTIONS</a><br>
|
<br><a name="SEC4" href="#TOC1">PCRE 16-BIT API INDIRECTED FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>void *(*pcre16_malloc)(size_t);</b>
|
<b>void *(*pcre16_malloc)(size_t);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void (*pcre16_free)(void *);</b>
|
<b>void (*pcre16_free)(void *);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void *(*pcre16_stack_malloc)(size_t);</b>
|
<b>void *(*pcre16_stack_malloc)(size_t);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void (*pcre16_stack_free)(void *);</b>
|
<b>void (*pcre16_stack_free)(void *);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int (*pcre16_callout)(pcre16_callout_block *);</b>
|
<b>int (*pcre16_callout)(pcre16_callout_block *);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">PCRE 16-BIT API 16-BIT-ONLY FUNCTION</a><br>
|
<br><a name="SEC5" href="#TOC1">PCRE 16-BIT API 16-BIT-ONLY FUNCTION</a><br>
|
||||||
@ -259,8 +259,9 @@ buffer, including the zero terminator if the string was zero-terminated.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">SUBJECT STRING OFFSETS</a><br>
|
<br><a name="SEC12" href="#TOC1">SUBJECT STRING OFFSETS</a><br>
|
||||||
<P>
|
<P>
|
||||||
The offsets within subject strings that are returned by the matching functions
|
The lengths and starting offsets of subject strings must be specified in 16-bit
|
||||||
are in 16-bit units rather than bytes.
|
data units, and the offsets within subject strings that are returned by the
|
||||||
|
matching functions are in also 16-bit units rather than bytes.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC13" href="#TOC1">NAMED SUBPATTERNS</a><br>
|
<br><a name="SEC13" href="#TOC1">NAMED SUBPATTERNS</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -374,9 +375,9 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC22" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC22" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 08 November 2012
|
Last updated: 12 May 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
382
tools/pcre/doc/html/pcre32.html
Normal file
382
tools/pcre/doc/html/pcre32.html
Normal file
@ -0,0 +1,382 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcre32 specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcre32 man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||||
|
from the original man page. If there is any nonsense in it, please consult the
|
||||||
|
man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<ul>
|
||||||
|
<li><a name="TOC1" href="#SEC1">PCRE 32-BIT API BASIC FUNCTIONS</a>
|
||||||
|
<li><a name="TOC2" href="#SEC2">PCRE 32-BIT API STRING EXTRACTION FUNCTIONS</a>
|
||||||
|
<li><a name="TOC3" href="#SEC3">PCRE 32-BIT API AUXILIARY FUNCTIONS</a>
|
||||||
|
<li><a name="TOC4" href="#SEC4">PCRE 32-BIT API INDIRECTED FUNCTIONS</a>
|
||||||
|
<li><a name="TOC5" href="#SEC5">PCRE 32-BIT API 32-BIT-ONLY FUNCTION</a>
|
||||||
|
<li><a name="TOC6" href="#SEC6">THE PCRE 32-BIT LIBRARY</a>
|
||||||
|
<li><a name="TOC7" href="#SEC7">THE HEADER FILE</a>
|
||||||
|
<li><a name="TOC8" href="#SEC8">THE LIBRARY NAME</a>
|
||||||
|
<li><a name="TOC9" href="#SEC9">STRING TYPES</a>
|
||||||
|
<li><a name="TOC10" href="#SEC10">STRUCTURE TYPES</a>
|
||||||
|
<li><a name="TOC11" href="#SEC11">32-BIT FUNCTIONS</a>
|
||||||
|
<li><a name="TOC12" href="#SEC12">SUBJECT STRING OFFSETS</a>
|
||||||
|
<li><a name="TOC13" href="#SEC13">NAMED SUBPATTERNS</a>
|
||||||
|
<li><a name="TOC14" href="#SEC14">OPTION NAMES</a>
|
||||||
|
<li><a name="TOC15" href="#SEC15">CHARACTER CODES</a>
|
||||||
|
<li><a name="TOC16" href="#SEC16">ERROR NAMES</a>
|
||||||
|
<li><a name="TOC17" href="#SEC17">ERROR TEXTS</a>
|
||||||
|
<li><a name="TOC18" href="#SEC18">CALLOUTS</a>
|
||||||
|
<li><a name="TOC19" href="#SEC19">TESTING</a>
|
||||||
|
<li><a name="TOC20" href="#SEC20">NOT SUPPORTED IN 32-BIT MODE</a>
|
||||||
|
<li><a name="TOC21" href="#SEC21">AUTHOR</a>
|
||||||
|
<li><a name="TOC22" href="#SEC22">REVISION</a>
|
||||||
|
</ul>
|
||||||
|
<P>
|
||||||
|
<b>#include <pcre.h></b>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC1" href="#TOC1">PCRE 32-BIT API BASIC FUNCTIONS</a><br>
|
||||||
|
<P>
|
||||||
|
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
|
<b> int *<i>errorcodeptr</i>,</b>
|
||||||
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
|
||||||
|
<b> const char **<i>errptr</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>void pcre32_free_study(pcre32_extra *<i>extra</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
|
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
|
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
|
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC2" href="#TOC1">PCRE 32-BIT API STRING EXTRACTION FUNCTIONS</a><br>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
|
||||||
|
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
|
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||||
|
<b> PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
|
||||||
|
<b> int <i>buffersize</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
|
||||||
|
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
|
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||||
|
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
|
||||||
|
<b> PCRE_SPTR32 <i>name</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
|
||||||
|
<b> PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
|
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
|
||||||
|
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>void pcre32_free_substring(PCRE_SPTR32 <i>stringptr</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>void pcre32_free_substring_list(PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC3" href="#TOC1">PCRE 32-BIT API AUXILIARY FUNCTIONS</a><br>
|
||||||
|
<P>
|
||||||
|
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>void pcre32_jit_stack_free(pcre32_jit_stack *<i>stack</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
|
||||||
|
<b> pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>const unsigned char *pcre32_maketables(void);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
|
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>int pcre32_refcount(pcre32 *<i>code</i>, int <i>adjust</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>int pcre32_config(int <i>what</i>, void *<i>where</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>const char *pcre32_version(void);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
|
||||||
|
<b> pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC4" href="#TOC1">PCRE 32-BIT API INDIRECTED FUNCTIONS</a><br>
|
||||||
|
<P>
|
||||||
|
<b>void *(*pcre32_malloc)(size_t);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>void (*pcre32_free)(void *);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>void *(*pcre32_stack_malloc)(size_t);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>void (*pcre32_stack_free)(void *);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>int (*pcre32_callout)(pcre32_callout_block *);</b>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC5" href="#TOC1">PCRE 32-BIT API 32-BIT-ONLY FUNCTION</a><br>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *<i>output</i>,</b>
|
||||||
|
<b> PCRE_SPTR32 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
|
||||||
|
<b> int <i>keep_boms</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC6" href="#TOC1">THE PCRE 32-BIT LIBRARY</a><br>
|
||||||
|
<P>
|
||||||
|
Starting with release 8.32, it is possible to compile a PCRE library that
|
||||||
|
supports 32-bit character strings, including UTF-32 strings, as well as or
|
||||||
|
instead of the original 8-bit library. This work was done by Christian Persch,
|
||||||
|
based on the work done by Zoltan Herczeg for the 16-bit library. All three
|
||||||
|
libraries contain identical sets of functions, used in exactly the same way.
|
||||||
|
Only the names of the functions and the data types of their arguments and
|
||||||
|
results are different. To avoid over-complication and reduce the documentation
|
||||||
|
maintenance load, most of the PCRE documentation describes the 8-bit library,
|
||||||
|
with only occasional references to the 16-bit and 32-bit libraries. This page
|
||||||
|
describes what is different when you use the 32-bit library.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
WARNING: A single application can be linked with all or any of the three
|
||||||
|
libraries, but you must take care when processing any particular pattern
|
||||||
|
to use functions from just one library. For example, if you want to study
|
||||||
|
a pattern that was compiled with <b>pcre32_compile()</b>, you must do so
|
||||||
|
with <b>pcre32_study()</b>, not <b>pcre_study()</b>, and you must free the
|
||||||
|
study data with <b>pcre32_free_study()</b>.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC7" href="#TOC1">THE HEADER FILE</a><br>
|
||||||
|
<P>
|
||||||
|
There is only one header file, <b>pcre.h</b>. It contains prototypes for all the
|
||||||
|
functions in all libraries, as well as definitions of flags, structures, error
|
||||||
|
codes, etc.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC8" href="#TOC1">THE LIBRARY NAME</a><br>
|
||||||
|
<P>
|
||||||
|
In Unix-like systems, the 32-bit library is called <b>libpcre32</b>, and can
|
||||||
|
normally be accesss by adding <b>-lpcre32</b> to the command for linking an
|
||||||
|
application that uses PCRE.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC9" href="#TOC1">STRING TYPES</a><br>
|
||||||
|
<P>
|
||||||
|
In the 8-bit library, strings are passed to PCRE library functions as vectors
|
||||||
|
of bytes with the C type "char *". In the 32-bit library, strings are passed as
|
||||||
|
vectors of unsigned 32-bit quantities. The macro PCRE_UCHAR32 specifies an
|
||||||
|
appropriate data type, and PCRE_SPTR32 is defined as "const PCRE_UCHAR32 *". In
|
||||||
|
very many environments, "unsigned int" is a 32-bit data type. When PCRE is
|
||||||
|
built, it defines PCRE_UCHAR32 as "unsigned int", but checks that it really is
|
||||||
|
a 32-bit data type. If it is not, the build fails with an error message telling
|
||||||
|
the maintainer to modify the definition appropriately.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC10" href="#TOC1">STRUCTURE TYPES</a><br>
|
||||||
|
<P>
|
||||||
|
The types of the opaque structures that are used for compiled 32-bit patterns
|
||||||
|
and JIT stacks are <b>pcre32</b> and <b>pcre32_jit_stack</b> respectively. The
|
||||||
|
type of the user-accessible structure that is returned by <b>pcre32_study()</b>
|
||||||
|
is <b>pcre32_extra</b>, and the type of the structure that is used for passing
|
||||||
|
data to a callout function is <b>pcre32_callout_block</b>. These structures
|
||||||
|
contain the same fields, with the same names, as their 8-bit counterparts. The
|
||||||
|
only difference is that pointers to character strings are 32-bit instead of
|
||||||
|
8-bit types.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC11" href="#TOC1">32-BIT FUNCTIONS</a><br>
|
||||||
|
<P>
|
||||||
|
For every function in the 8-bit library there is a corresponding function in
|
||||||
|
the 32-bit library with a name that starts with <b>pcre32_</b> instead of
|
||||||
|
<b>pcre_</b>. The prototypes are listed above. In addition, there is one extra
|
||||||
|
function, <b>pcre32_utf32_to_host_byte_order()</b>. This is a utility function
|
||||||
|
that converts a UTF-32 character string to host byte order if necessary. The
|
||||||
|
other 32-bit functions expect the strings they are passed to be in host byte
|
||||||
|
order.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The <i>input</i> and <i>output</i> arguments of
|
||||||
|
<b>pcre32_utf32_to_host_byte_order()</b> may point to the same address, that is,
|
||||||
|
conversion in place is supported. The output buffer must be at least as long as
|
||||||
|
the input.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The <i>length</i> argument specifies the number of 32-bit data units in the
|
||||||
|
input string; a negative value specifies a zero-terminated string.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If <i>byte_order</i> is NULL, it is assumed that the string starts off in host
|
||||||
|
byte order. This may be changed by byte-order marks (BOMs) anywhere in the
|
||||||
|
string (commonly as the first character).
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If <i>byte_order</i> is not NULL, a non-zero value of the integer to which it
|
||||||
|
points means that the input starts off in host byte order, otherwise the
|
||||||
|
opposite order is assumed. Again, BOMs in the string can change this. The final
|
||||||
|
byte order is passed back at the end of processing.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If <i>keep_boms</i> is not zero, byte-order mark characters (0xfeff) are copied
|
||||||
|
into the output string. Otherwise they are discarded.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The result of the function is the number of 32-bit units placed into the output
|
||||||
|
buffer, including the zero terminator if the string was zero-terminated.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC12" href="#TOC1">SUBJECT STRING OFFSETS</a><br>
|
||||||
|
<P>
|
||||||
|
The lengths and starting offsets of subject strings must be specified in 32-bit
|
||||||
|
data units, and the offsets within subject strings that are returned by the
|
||||||
|
matching functions are in also 32-bit units rather than bytes.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC13" href="#TOC1">NAMED SUBPATTERNS</a><br>
|
||||||
|
<P>
|
||||||
|
The name-to-number translation table that is maintained for named subpatterns
|
||||||
|
uses 32-bit characters. The <b>pcre32_get_stringtable_entries()</b> function
|
||||||
|
returns the length of each entry in the table as the number of 32-bit data
|
||||||
|
units.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC14" href="#TOC1">OPTION NAMES</a><br>
|
||||||
|
<P>
|
||||||
|
There are two new general option names, PCRE_UTF32 and PCRE_NO_UTF32_CHECK,
|
||||||
|
which correspond to PCRE_UTF8 and PCRE_NO_UTF8_CHECK in the 8-bit library. In
|
||||||
|
fact, these new options define the same bits in the options word. There is a
|
||||||
|
discussion about the
|
||||||
|
<a href="pcreunicode.html#utf32strings">validity of UTF-32 strings</a>
|
||||||
|
in the
|
||||||
|
<a href="pcreunicode.html"><b>pcreunicode</b></a>
|
||||||
|
page.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
For the <b>pcre32_config()</b> function there is an option PCRE_CONFIG_UTF32
|
||||||
|
that returns 1 if UTF-32 support is configured, otherwise 0. If this option is
|
||||||
|
given to <b>pcre_config()</b> or <b>pcre16_config()</b>, or if the
|
||||||
|
PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF16 option is given to <b>pcre32_config()</b>,
|
||||||
|
the result is the PCRE_ERROR_BADOPTION error.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC15" href="#TOC1">CHARACTER CODES</a><br>
|
||||||
|
<P>
|
||||||
|
In 32-bit mode, when PCRE_UTF32 is not set, character values are treated in the
|
||||||
|
same way as in 8-bit, non UTF-8 mode, except, of course, that they can range
|
||||||
|
from 0 to 0x7fffffff instead of 0 to 0xff. Character types for characters less
|
||||||
|
than 0xff can therefore be influenced by the locale in the same way as before.
|
||||||
|
Characters greater than 0xff have only one case, and no "type" (such as letter
|
||||||
|
or digit).
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
In UTF-32 mode, the character code is Unicode, in the range 0 to 0x10ffff, with
|
||||||
|
the exception of values in the range 0xd800 to 0xdfff because those are
|
||||||
|
"surrogate" values that are ill-formed in UTF-32.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
A UTF-32 string can indicate its endianness by special code knows as a
|
||||||
|
byte-order mark (BOM). The PCRE functions do not handle this, expecting strings
|
||||||
|
to be in host byte order. A utility function called
|
||||||
|
<b>pcre32_utf32_to_host_byte_order()</b> is provided to help with this (see
|
||||||
|
above).
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC16" href="#TOC1">ERROR NAMES</a><br>
|
||||||
|
<P>
|
||||||
|
The error PCRE_ERROR_BADUTF32 corresponds to its 8-bit counterpart.
|
||||||
|
The error PCRE_ERROR_BADMODE is given when a compiled
|
||||||
|
pattern is passed to a function that processes patterns in the other
|
||||||
|
mode, for example, if a pattern compiled with <b>pcre_compile()</b> is passed to
|
||||||
|
<b>pcre32_exec()</b>.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There are new error codes whose names begin with PCRE_UTF32_ERR for invalid
|
||||||
|
UTF-32 strings, corresponding to the PCRE_UTF8_ERR codes for UTF-8 strings that
|
||||||
|
are described in the section entitled
|
||||||
|
<a href="pcreapi.html#badutf8reasons">"Reason codes for invalid UTF-8 strings"</a>
|
||||||
|
in the main
|
||||||
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
|
page. The UTF-32 errors are:
|
||||||
|
<pre>
|
||||||
|
PCRE_UTF32_ERR1 Surrogate character (range from 0xd800 to 0xdfff)
|
||||||
|
PCRE_UTF32_ERR2 Non-character
|
||||||
|
PCRE_UTF32_ERR3 Character > 0x10ffff
|
||||||
|
</PRE>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC17" href="#TOC1">ERROR TEXTS</a><br>
|
||||||
|
<P>
|
||||||
|
If there is an error while compiling a pattern, the error text that is passed
|
||||||
|
back by <b>pcre32_compile()</b> or <b>pcre32_compile2()</b> is still an 8-bit
|
||||||
|
character string, zero-terminated.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC18" href="#TOC1">CALLOUTS</a><br>
|
||||||
|
<P>
|
||||||
|
The <i>subject</i> and <i>mark</i> fields in the callout block that is passed to
|
||||||
|
a callout function point to 32-bit vectors.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC19" href="#TOC1">TESTING</a><br>
|
||||||
|
<P>
|
||||||
|
The <b>pcretest</b> program continues to operate with 8-bit input and output
|
||||||
|
files, but it can be used for testing the 32-bit library. If it is run with the
|
||||||
|
command line option <b>-32</b>, patterns and subject strings are converted from
|
||||||
|
8-bit to 32-bit before being passed to PCRE, and the 32-bit library functions
|
||||||
|
are used instead of the 8-bit ones. Returned 32-bit strings are converted to
|
||||||
|
8-bit for output. If both the 8-bit and the 16-bit libraries were not compiled,
|
||||||
|
<b>pcretest</b> defaults to 32-bit and the <b>-32</b> option is ignored.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
When PCRE is being built, the <b>RunTest</b> script that is called by "make
|
||||||
|
check" uses the <b>pcretest</b> <b>-C</b> option to discover which of the 8-bit,
|
||||||
|
16-bit and 32-bit libraries has been built, and runs the tests appropriately.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC20" href="#TOC1">NOT SUPPORTED IN 32-BIT MODE</a><br>
|
||||||
|
<P>
|
||||||
|
Not all the features of the 8-bit library are available with the 32-bit
|
||||||
|
library. The C++ and POSIX wrapper functions support only the 8-bit library,
|
||||||
|
and the <b>pcregrep</b> program is at present 8-bit only.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC21" href="#TOC1">AUTHOR</a><br>
|
||||||
|
<P>
|
||||||
|
Philip Hazel
|
||||||
|
<br>
|
||||||
|
University Computing Service
|
||||||
|
<br>
|
||||||
|
Cambridge CB2 3QH, England.
|
||||||
|
<br>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC22" href="#TOC1">REVISION</a><br>
|
||||||
|
<P>
|
||||||
|
Last updated: 12 May 2013
|
||||||
|
<br>
|
||||||
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
|
<br>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
@ -21,12 +21,12 @@ SYNOPSIS
|
|||||||
<P>
|
<P>
|
||||||
<b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
|
<b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
|
||||||
<b> pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
<b> pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
|
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
|
||||||
<b> pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
<b> pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
|
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
|
||||||
<b> pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
<b> pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
@ -22,13 +22,13 @@ SYNOPSIS
|
|||||||
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
@ -65,6 +65,7 @@ The option bits are:
|
|||||||
PCRE_FIRSTLINE Force matching to be before newline
|
PCRE_FIRSTLINE Force matching to be before newline
|
||||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||||
PCRE_MULTILINE ^ and $ match newlines within data
|
PCRE_MULTILINE ^ and $ match newlines within data
|
||||||
|
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
|
||||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||||
sequences
|
sequences
|
||||||
@ -73,6 +74,8 @@ The option bits are:
|
|||||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||||
theses (named ones available)
|
theses (named ones available)
|
||||||
|
PCRE_NO_AUTO_POSSESS Disable auto-possessification
|
||||||
|
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||||
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||||
validity (only relevant if
|
validity (only relevant if
|
||||||
PCRE_UTF16 is set)
|
PCRE_UTF16 is set)
|
||||||
|
@ -23,16 +23,16 @@ SYNOPSIS
|
|||||||
<b> int *<i>errorcodeptr</i>,</b>
|
<b> int *<i>errorcodeptr</i>,</b>
|
||||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b> int *<i>errorcodeptr</i>,</b>
|
<b> int *<i>errorcodeptr</i>,</b>
|
||||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b>int *<i>errorcodeptr</i>,</b>
|
<b>" int *<i>errorcodeptr</i>,£</b>
|
||||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
@ -69,6 +69,7 @@ The option bits are:
|
|||||||
PCRE_FIRSTLINE Force matching to be before newline
|
PCRE_FIRSTLINE Force matching to be before newline
|
||||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||||
PCRE_MULTILINE ^ and $ match newlines within data
|
PCRE_MULTILINE ^ and $ match newlines within data
|
||||||
|
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
|
||||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||||
sequences
|
sequences
|
||||||
@ -77,6 +78,8 @@ The option bits are:
|
|||||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||||
theses (named ones available)
|
theses (named ones available)
|
||||||
|
PCRE_NO_AUTO_POSSESS Disable auto-possessification
|
||||||
|
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||||
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||||
validity (only relevant if
|
validity (only relevant if
|
||||||
PCRE_UTF16 is set)
|
PCRE_UTF16 is set)
|
||||||
|
@ -48,6 +48,7 @@ point to an unsigned long integer. The available codes are:
|
|||||||
target architecture for the JIT compiler,
|
target architecture for the JIT compiler,
|
||||||
or NULL if there is no JIT support
|
or NULL if there is no JIT support
|
||||||
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
||||||
|
PCRE_CONFIG_PARENS_LIMIT Parentheses nesting limit
|
||||||
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
||||||
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
||||||
Internal recursion depth limit
|
Internal recursion depth limit
|
||||||
|
@ -23,14 +23,14 @@ SYNOPSIS
|
|||||||
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||||
<b> char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
<b> char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
|
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
|
||||||
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||||
<b> PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
<b> PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
|
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
|
||||||
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||||
|
@ -22,13 +22,13 @@ SYNOPSIS
|
|||||||
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
||||||
<b> int <i>buffersize</i>);</b>
|
<b> int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
||||||
<b> int <i>buffersize</i>);</b>
|
<b> int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
|
||||||
<b> int <i>buffersize</i>);</b>
|
<b> int <i>buffersize</i>);</b>
|
||||||
|
@ -23,14 +23,14 @@ SYNOPSIS
|
|||||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
@ -50,16 +50,17 @@ are:
|
|||||||
<i>extra</i> Points to an associated <b>pcre[16|32]_extra</b> structure,
|
<i>extra</i> Points to an associated <b>pcre[16|32]_extra</b> structure,
|
||||||
or is NULL
|
or is NULL
|
||||||
<i>subject</i> Points to the subject string
|
<i>subject</i> Points to the subject string
|
||||||
<i>length</i> Length of the subject string, in bytes
|
<i>length</i> Length of the subject string
|
||||||
<i>startoffset</i> Offset in bytes in the subject at which to
|
<i>startoffset</i> Offset in the subject at which to start matching
|
||||||
start matching
|
|
||||||
<i>options</i> Option bits
|
<i>options</i> Option bits
|
||||||
<i>ovector</i> Points to a vector of ints for result offsets
|
<i>ovector</i> Points to a vector of ints for result offsets
|
||||||
<i>ovecsize</i> Number of elements in the vector
|
<i>ovecsize</i> Number of elements in the vector
|
||||||
<i>workspace</i> Points to a vector of ints used as working space
|
<i>workspace</i> Points to a vector of ints used as working space
|
||||||
<i>wscount</i> Number of elements in the vector
|
<i>wscount</i> Number of elements in the vector
|
||||||
</pre>
|
</pre>
|
||||||
The options are:
|
The units for <i>length</i> and <i>startoffset</i> are bytes for
|
||||||
|
<b>pcre_exec()</b>, 16-bit data items for <b>pcre16_exec()</b>, and 32-bit items
|
||||||
|
for <b>pcre32_exec()</b>. The options are:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_ANCHORED Match only at the first position
|
PCRE_ANCHORED Match only at the first position
|
||||||
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
|
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
|
||||||
|
@ -22,13 +22,13 @@ SYNOPSIS
|
|||||||
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
@ -45,14 +45,15 @@ offsets to captured substrings. Its arguments are:
|
|||||||
<i>extra</i> Points to an associated <b>pcre[16|32]_extra</b> structure,
|
<i>extra</i> Points to an associated <b>pcre[16|32]_extra</b> structure,
|
||||||
or is NULL
|
or is NULL
|
||||||
<i>subject</i> Points to the subject string
|
<i>subject</i> Points to the subject string
|
||||||
<i>length</i> Length of the subject string, in bytes
|
<i>length</i> Length of the subject string
|
||||||
<i>startoffset</i> Offset in bytes in the subject at which to
|
<i>startoffset</i> Offset in the subject at which to start matching
|
||||||
start matching
|
|
||||||
<i>options</i> Option bits
|
<i>options</i> Option bits
|
||||||
<i>ovector</i> Points to a vector of ints for result offsets
|
<i>ovector</i> Points to a vector of ints for result offsets
|
||||||
<i>ovecsize</i> Number of elements in the vector (a multiple of 3)
|
<i>ovecsize</i> Number of elements in the vector (a multiple of 3)
|
||||||
</pre>
|
</pre>
|
||||||
The options are:
|
The units for <i>length</i> and <i>startoffset</i> are bytes for
|
||||||
|
<b>pcre_exec()</b>, 16-bit data items for <b>pcre16_exec()</b>, and 32-bit items
|
||||||
|
for <b>pcre32_exec()</b>. The options are:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_ANCHORED Match only at the first position
|
PCRE_ANCHORED Match only at the first position
|
||||||
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
|
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
|
||||||
|
@ -21,12 +21,12 @@ SYNOPSIS
|
|||||||
<P>
|
<P>
|
||||||
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
@ -23,14 +23,14 @@ SYNOPSIS
|
|||||||
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||||
<b> const char **<i>stringptr</i>);</b>
|
<b> const char **<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
|
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
|
||||||
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||||
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
|
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
|
||||||
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||||
|
@ -21,12 +21,12 @@ SYNOPSIS
|
|||||||
<P>
|
<P>
|
||||||
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
||||||
<b> const char *<i>name</i>);</b>
|
<b> const char *<i>name</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
|
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
|
||||||
<b> PCRE_SPTR16 <i>name</i>);</b>
|
<b> PCRE_SPTR16 <i>name</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
|
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
|
||||||
<b> PCRE_SPTR32 <i>name</i>);</b>
|
<b> PCRE_SPTR32 <i>name</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
@ -21,12 +21,12 @@ SYNOPSIS
|
|||||||
<P>
|
<P>
|
||||||
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
||||||
<b> const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
<b> const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
|
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
|
||||||
<b> PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
<b> PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
|
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
|
||||||
<b> PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
|
<b> PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
@ -22,13 +22,13 @@ SYNOPSIS
|
|||||||
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
<b> const char **<i>stringptr</i>);</b>
|
<b> const char **<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
|
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||||
|
@ -21,12 +21,12 @@ SYNOPSIS
|
|||||||
<P>
|
<P>
|
||||||
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
||||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
<b> int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
|
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
|
||||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
|
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
|
||||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
|
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
@ -23,14 +23,14 @@ SYNOPSIS
|
|||||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_jit_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
<b>int pcre16_jit_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_jit_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
<b>int pcre32_jit_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
|
@ -21,12 +21,12 @@ SYNOPSIS
|
|||||||
<P>
|
<P>
|
||||||
<b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>,</b>
|
<b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>,</b>
|
||||||
<b> int <i>maxsize</i>);</b>
|
<b> int <i>maxsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>,</b>
|
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>,</b>
|
||||||
<b> int <i>maxsize</i>);</b>
|
<b> int <i>maxsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>,</b>
|
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>,</b>
|
||||||
<b> int <i>maxsize</i>);</b>
|
<b> int <i>maxsize</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
@ -21,12 +21,12 @@ SYNOPSIS
|
|||||||
<P>
|
<P>
|
||||||
<b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
|
<b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
|
||||||
<b> pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
<b> pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
|
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
|
||||||
<b> pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
<b> pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
|
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
|
||||||
<b> pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
<b> pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
@ -21,12 +21,12 @@ SYNOPSIS
|
|||||||
<P>
|
<P>
|
||||||
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
||||||
<b> const char **<i>errptr</i>);</b>
|
<b> const char **<i>errptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
|
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
|
||||||
<b> const char **<i>errptr</i>);</b>
|
<b> const char **<i>errptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
|
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
|
||||||
<b> const char **<i>errptr</i>);</b>
|
<b> const char **<i>errptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
57
tools/pcre/doc/html/pcre_utf32_to_host_byte_order.html
Normal file
57
tools/pcre/doc/html/pcre_utf32_to_host_byte_order.html
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcre_utf32_to_host_byte_order specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcre_utf32_to_host_byte_order man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||||
|
from the original man page. If there is any nonsense in it, please consult the
|
||||||
|
man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<br><b>
|
||||||
|
SYNOPSIS
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
<b>#include <pcre.h></b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *<i>output</i>,</b>
|
||||||
|
<b> PCRE_SPTR32 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
|
||||||
|
<b> int <i>keep_boms</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
DESCRIPTION
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
This function, which exists only in the 32-bit library, converts a UTF-32
|
||||||
|
string to the correct order for the current host, taking account of any byte
|
||||||
|
order marks (BOMs) within the string. Its arguments are:
|
||||||
|
<pre>
|
||||||
|
<i>output</i> pointer to output buffer, may be the same as <i>input</i>
|
||||||
|
<i>input</i> pointer to input buffer
|
||||||
|
<i>length</i> number of 32-bit units in the input, or negative for
|
||||||
|
a zero-terminated string
|
||||||
|
<i>host_byte_order</i> a NULL value or a non-zero value pointed to means
|
||||||
|
start in host byte order
|
||||||
|
<i>keep_boms</i> if non-zero, BOMs are copied to the output string
|
||||||
|
</pre>
|
||||||
|
The result of the function is the number of 32-bit units placed into the output
|
||||||
|
buffer, including the zero terminator if the string was zero-terminated.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If <i>host_byte_order</i> is not NULL, it is set to indicate the byte order that
|
||||||
|
is current at the end of the string.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There is a complete description of the PCRE native API in the
|
||||||
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||||
|
page.
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
</p>
|
@ -48,26 +48,26 @@ man page, in case the conversion went wrong.
|
|||||||
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b> int *<i>errorcodeptr</i>,</b>
|
<b> int *<i>errorcodeptr</i>,</b>
|
||||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
||||||
<b> const char **<i>errptr</i>);</b>
|
<b> const char **<i>errptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre_free_study(pcre_extra *<i>extra</i>);</b>
|
<b>void pcre_free_study(pcre_extra *<i>extra</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
@ -79,39 +79,39 @@ man page, in case the conversion went wrong.
|
|||||||
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||||
<b> char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
<b> char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
||||||
<b> int <i>buffersize</i>);</b>
|
<b> int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
|
||||||
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||||
<b> const char **<i>stringptr</i>);</b>
|
<b> const char **<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
||||||
<b> const char *<i>name</i>);</b>
|
<b> const char *<i>name</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
||||||
<b> const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
<b> const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
<b> const char **<i>stringptr</i>);</b>
|
<b> const char **<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
||||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
<b> int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre_free_substring(const char *<i>stringptr</i>);</b>
|
<b>void pcre_free_substring(const char *<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>
|
<b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">PCRE NATIVE API AUXILIARY FUNCTIONS</a><br>
|
<br><a name="SEC3" href="#TOC1">PCRE NATIVE API AUXILIARY FUNCTIONS</a><br>
|
||||||
@ -120,52 +120,55 @@ man page, in case the conversion went wrong.
|
|||||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
<b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre_jit_stack_free(pcre_jit_stack *<i>stack</i>);</b>
|
<b>void pcre_jit_stack_free(pcre_jit_stack *<i>stack</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
|
<b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
|
||||||
<b> pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
<b> pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>const unsigned char *pcre_maketables(void);</b>
|
<b>const unsigned char *pcre_maketables(void);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
|
<b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
|
<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>const char *pcre_version(void);</b>
|
<b>const char *pcre_version(void);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
|
<b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
|
||||||
<b> pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
<b> pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">PCRE NATIVE API INDIRECTED FUNCTIONS</a><br>
|
<br><a name="SEC4" href="#TOC1">PCRE NATIVE API INDIRECTED FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>void *(*pcre_malloc)(size_t);</b>
|
<b>void *(*pcre_malloc)(size_t);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void (*pcre_free)(void *);</b>
|
<b>void (*pcre_free)(void *);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void *(*pcre_stack_malloc)(size_t);</b>
|
<b>void *(*pcre_stack_malloc)(size_t);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void (*pcre_stack_free)(void *);</b>
|
<b>void (*pcre_stack_free)(void *);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int (*pcre_callout)(pcre_callout_block *);</b>
|
<b>int (*pcre_callout)(pcre_callout_block *);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>int (*pcre_stack_guard)(void);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">PCRE 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
|
<br><a name="SEC5" href="#TOC1">PCRE 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -187,10 +190,10 @@ by UTF16 or UTF32, respectively. This facility is in fact just cosmetic; the
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
References to bytes and UTF-8 in this document should be read as references to
|
References to bytes and UTF-8 in this document should be read as references to
|
||||||
16-bit data quantities and UTF-16 when using the 16-bit library, or 32-bit data
|
16-bit data units and UTF-16 when using the 16-bit library, or 32-bit data
|
||||||
quantities and UTF-32 when using the 32-bit library, unless specified
|
units and UTF-32 when using the 32-bit library, unless specified otherwise.
|
||||||
otherwise. More details of the specific differences for the 16-bit and 32-bit
|
More details of the specific differences for the 16-bit and 32-bit libraries
|
||||||
libraries are given in the
|
are given in the
|
||||||
<a href="pcre16.html"><b>pcre16</b></a>
|
<a href="pcre16.html"><b>pcre16</b></a>
|
||||||
and
|
and
|
||||||
<a href="pcre32.html"><b>pcre32</b></a>
|
<a href="pcre32.html"><b>pcre32</b></a>
|
||||||
@ -324,6 +327,15 @@ by the caller to a "callout" function, which PCRE will then call at specified
|
|||||||
points during a matching operation. Details are given in the
|
points during a matching operation. Details are given in the
|
||||||
<a href="pcrecallout.html"><b>pcrecallout</b></a>
|
<a href="pcrecallout.html"><b>pcrecallout</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The global variable <b>pcre_stack_guard</b> initially contains NULL. It can be
|
||||||
|
set by the caller to a function that is called by PCRE whenever it starts
|
||||||
|
to compile a parenthesized part of a pattern. When parentheses are nested, PCRE
|
||||||
|
uses recursive function calls, which use up the system stack. This function is
|
||||||
|
provided so that applications with restricted stacks can force a compilation
|
||||||
|
error if the stack runs out. The function should return zero if all is well, or
|
||||||
|
non-zero to force an error.
|
||||||
<a name="newlines"></a></P>
|
<a name="newlines"></a></P>
|
||||||
<br><a name="SEC7" href="#TOC1">NEWLINES</a><br>
|
<br><a name="SEC7" href="#TOC1">NEWLINES</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -369,7 +381,8 @@ controlled in a similar way, but by separate options.
|
|||||||
The PCRE functions can be used in multi-threading applications, with the
|
The PCRE functions can be used in multi-threading applications, with the
|
||||||
proviso that the memory management functions pointed to by <b>pcre_malloc</b>,
|
proviso that the memory management functions pointed to by <b>pcre_malloc</b>,
|
||||||
<b>pcre_free</b>, <b>pcre_stack_malloc</b>, and <b>pcre_stack_free</b>, and the
|
<b>pcre_free</b>, <b>pcre_stack_malloc</b>, and <b>pcre_stack_free</b>, and the
|
||||||
callout function pointed to by <b>pcre_callout</b>, are shared by all threads.
|
callout and stack-checking functions pointed to by <b>pcre_callout</b> and
|
||||||
|
<b>pcre_stack_guard</b>, are shared by all threads.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The compiled form of a regular expression is not altered during matching, so
|
The compiled form of a regular expression is not altered during matching, so
|
||||||
@ -483,6 +496,16 @@ interface uses <b>malloc()</b> for output vectors. Further details are given in
|
|||||||
the
|
the
|
||||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
|
<pre>
|
||||||
|
PCRE_CONFIG_PARENS_LIMIT
|
||||||
|
</pre>
|
||||||
|
The output is a long integer that gives the maximum depth of nesting of
|
||||||
|
parentheses (of any kind) in a pattern. This limit is imposed to cap the amount
|
||||||
|
of system stack used when a pattern is compiled. It is specified when PCRE is
|
||||||
|
built; the default is 250. This limit does not take into account the stack that
|
||||||
|
may already be used by the calling application. For finer control over
|
||||||
|
compilation stack usage, you can set a pointer to an external checking function
|
||||||
|
in <b>pcre_stack_guard</b>.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_CONFIG_MATCH_LIMIT
|
PCRE_CONFIG_MATCH_LIMIT
|
||||||
</pre>
|
</pre>
|
||||||
@ -511,6 +534,8 @@ avoiding the use of the stack.
|
|||||||
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b> int *<i>errorcodeptr</i>,</b>
|
<b> int *<i>errorcodeptr</i>,</b>
|
||||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
@ -558,16 +583,16 @@ Otherwise, if compilation of a pattern fails, <b>pcre_compile()</b> returns
|
|||||||
NULL, and sets the variable pointed to by <i>errptr</i> to point to a textual
|
NULL, and sets the variable pointed to by <i>errptr</i> to point to a textual
|
||||||
error message. This is a static string that is part of the library. You must
|
error message. This is a static string that is part of the library. You must
|
||||||
not try to free it. Normally, the offset from the start of the pattern to the
|
not try to free it. Normally, the offset from the start of the pattern to the
|
||||||
byte that was being processed when the error was discovered is placed in the
|
data unit that was being processed when the error was discovered is placed in
|
||||||
variable pointed to by <i>erroffset</i>, which must not be NULL (if it is, an
|
the variable pointed to by <i>erroffset</i>, which must not be NULL (if it is,
|
||||||
immediate error is given). However, for an invalid UTF-8 string, the offset is
|
an immediate error is given). However, for an invalid UTF-8 or UTF-16 string,
|
||||||
that of the first byte of the failing character.
|
the offset is that of the first data unit of the failing character.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Some errors are not detected until the whole pattern has been scanned; in these
|
Some errors are not detected until the whole pattern has been scanned; in these
|
||||||
cases, the offset passed back is the length of the pattern. Note that the
|
cases, the offset passed back is the length of the pattern. Note that the
|
||||||
offset is in bytes, not characters, even in UTF-8 mode. It may sometimes point
|
offset is in data units, not characters, even in a UTF mode. It may sometimes
|
||||||
into the middle of a UTF-8 character.
|
point into the middle of a UTF-8 or UTF-16 character.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If <b>pcre_compile2()</b> is used instead of <b>pcre_compile()</b>, and the
|
If <b>pcre_compile2()</b> is used instead of <b>pcre_compile()</b>, and the
|
||||||
@ -580,8 +605,9 @@ If the final argument, <i>tableptr</i>, is NULL, PCRE uses a default set of
|
|||||||
character tables that are built when PCRE is compiled, using the default C
|
character tables that are built when PCRE is compiled, using the default C
|
||||||
locale. Otherwise, <i>tableptr</i> must be an address that is the result of a
|
locale. Otherwise, <i>tableptr</i> must be an address that is the result of a
|
||||||
call to <b>pcre_maketables()</b>. This value is stored with the compiled
|
call to <b>pcre_maketables()</b>. This value is stored with the compiled
|
||||||
pattern, and used again by <b>pcre_exec()</b>, unless another table pointer is
|
pattern, and used again by <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b> when the
|
||||||
passed to it. For more discussion, see the section on locale support below.
|
pattern is matched. For more discussion, see the section on locale support
|
||||||
|
below.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
This code fragment shows a typical straightforward call to <b>pcre_compile()</b>:
|
This code fragment shows a typical straightforward call to <b>pcre_compile()</b>:
|
||||||
@ -666,12 +692,24 @@ documentation.
|
|||||||
<pre>
|
<pre>
|
||||||
PCRE_EXTENDED
|
PCRE_EXTENDED
|
||||||
</pre>
|
</pre>
|
||||||
If this bit is set, white space data characters in the pattern are totally
|
If this bit is set, most white space characters in the pattern are totally
|
||||||
ignored except when escaped or inside a character class. White space does not
|
ignored except when escaped or inside a character class. However, white space
|
||||||
include the VT character (code 11). In addition, characters between an
|
is not allowed within sequences such as (?> that introduce various
|
||||||
unescaped # outside a character class and the next newline, inclusive, are also
|
parenthesized subpatterns, nor within a numerical quantifier such as {1,3}.
|
||||||
ignored. This is equivalent to Perl's /x option, and it can be changed within a
|
However, ignorable white space is permitted between an item and a following
|
||||||
pattern by a (?x) option setting.
|
quantifier and between a quantifier and a following + that indicates
|
||||||
|
possessiveness.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
White space did not used to include the VT character (code 11), because Perl
|
||||||
|
did not treat this character as white space. However, Perl changed at release
|
||||||
|
5.18, so PCRE followed at release 8.34, and VT is now treated as white space.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
PCRE_EXTENDED also causes characters between an unescaped # outside a character
|
||||||
|
class and the next newline, inclusive, to be ignored. PCRE_EXTENDED is
|
||||||
|
equivalent to Perl's /x option, and it can be changed within a pattern by a
|
||||||
|
(?x) option setting.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Which characters are interpreted as newlines is controlled by the options
|
Which characters are interpreted as newlines is controlled by the options
|
||||||
@ -741,12 +779,14 @@ binary zero character followed by z).
|
|||||||
<pre>
|
<pre>
|
||||||
PCRE_MULTILINE
|
PCRE_MULTILINE
|
||||||
</pre>
|
</pre>
|
||||||
By default, PCRE treats the subject string as consisting of a single line of
|
By default, for the purposes of matching "start of line" and "end of line",
|
||||||
characters (even if it actually contains newlines). The "start of line"
|
PCRE treats the subject string as consisting of a single line of characters,
|
||||||
metacharacter (^) matches only at the start of the string, while the "end of
|
even if it actually contains newlines. The "start of line" metacharacter (^)
|
||||||
line" metacharacter ($) matches only at the end of the string, or before a
|
matches only at the start of the string, and the "end of line" metacharacter
|
||||||
terminating newline (unless PCRE_DOLLAR_ENDONLY is set). This is the same as
|
($) matches only at the end of the string, or before a terminating newline
|
||||||
Perl.
|
(except when PCRE_DOLLAR_ENDONLY is set). Note, however, that unless
|
||||||
|
PCRE_DOTALL is set, the "any character" metacharacter (.) does not match at a
|
||||||
|
newline. This behaviour (for ^, $, and dot) is the same as Perl.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
When PCRE_MULTILINE it is set, the "start of line" and "end of line" constructs
|
When PCRE_MULTILINE it is set, the "start of line" and "end of line" constructs
|
||||||
@ -755,6 +795,15 @@ subject string, respectively, as well as at the very start and end. This is
|
|||||||
equivalent to Perl's /m option, and it can be changed within a pattern by a
|
equivalent to Perl's /m option, and it can be changed within a pattern by a
|
||||||
(?m) option setting. If there are no newlines in a subject string, or no
|
(?m) option setting. If there are no newlines in a subject string, or no
|
||||||
occurrences of ^ or $ in a pattern, setting PCRE_MULTILINE has no effect.
|
occurrences of ^ or $ in a pattern, setting PCRE_MULTILINE has no effect.
|
||||||
|
<pre>
|
||||||
|
PCRE_NEVER_UTF
|
||||||
|
</pre>
|
||||||
|
This option locks out interpretation of the pattern as UTF-8 (or UTF-16 or
|
||||||
|
UTF-32 in the 16-bit and 32-bit libraries). In particular, it prevents the
|
||||||
|
creator of the pattern from switching to UTF interpretation by starting the
|
||||||
|
pattern with (*UTF). This may be useful in applications that process patterns
|
||||||
|
from external sources. The combination of PCRE_UTF8 and PCRE_NEVER_UTF also
|
||||||
|
causes an error.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_NEWLINE_CR
|
PCRE_NEWLINE_CR
|
||||||
PCRE_NEWLINE_LF
|
PCRE_NEWLINE_LF
|
||||||
@ -814,12 +863,23 @@ were followed by ?: but named parentheses can still be used for capturing (and
|
|||||||
they acquire numbers in the usual way). There is no equivalent of this option
|
they acquire numbers in the usual way). There is no equivalent of this option
|
||||||
in Perl.
|
in Perl.
|
||||||
<pre>
|
<pre>
|
||||||
NO_START_OPTIMIZE
|
PCRE_NO_AUTO_POSSESS
|
||||||
|
</pre>
|
||||||
|
If this option is set, it disables "auto-possessification". This is an
|
||||||
|
optimization that, for example, turns a+b into a++b in order to avoid
|
||||||
|
backtracks into a+ that can never be successful. However, if callouts are in
|
||||||
|
use, auto-possessification means that some of them are never taken. You can set
|
||||||
|
this option if you want the matching functions to do a full unoptimized search
|
||||||
|
and run all the callouts, but it is mainly provided for testing purposes.
|
||||||
|
<pre>
|
||||||
|
PCRE_NO_START_OPTIMIZE
|
||||||
</pre>
|
</pre>
|
||||||
This is an option that acts at matching time; that is, it is really an option
|
This is an option that acts at matching time; that is, it is really an option
|
||||||
for <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>. If it is set at compile time,
|
for <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>. If it is set at compile time,
|
||||||
it is remembered with the compiled pattern and assumed at matching time. For
|
it is remembered with the compiled pattern and assumed at matching time. This
|
||||||
details see the discussion of PCRE_NO_START_OPTIMIZE
|
is necessary if you want to use JIT execution, because the JIT compiler needs
|
||||||
|
to know whether or not this option is set. For details see the discussion of
|
||||||
|
PCRE_NO_START_OPTIMIZE
|
||||||
<a href="#execoptions">below.</a>
|
<a href="#execoptions">below.</a>
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_UCP
|
PCRE_UCP
|
||||||
@ -862,10 +922,10 @@ page. If an invalid UTF-8 sequence is found, <b>pcre_compile()</b> returns an
|
|||||||
error. If you already know that your pattern is valid, and you want to skip
|
error. If you already know that your pattern is valid, and you want to skip
|
||||||
this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK option.
|
this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK option.
|
||||||
When it is set, the effect of passing an invalid UTF-8 string as a pattern is
|
When it is set, the effect of passing an invalid UTF-8 string as a pattern is
|
||||||
undefined. It may cause your program to crash. Note that this option can also
|
undefined. It may cause your program to crash or loop. Note that this option
|
||||||
be passed to <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b>, to suppress the
|
can also be passed to <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b>, to suppress
|
||||||
validity checking of subject strings only. If the same string is being matched
|
the validity checking of subject strings only. If the same string is being
|
||||||
many times, the option can be safely set for the second and subsequent
|
matched many times, the option can be safely set for the second and subsequent
|
||||||
matchings to improve performance.
|
matchings to improve performance.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">COMPILATION ERROR CODES</a><br>
|
<br><a name="SEC12" href="#TOC1">COMPILATION ERROR CODES</a><br>
|
||||||
@ -910,7 +970,7 @@ have fallen out of use. To avoid confusion, they have not been re-used.
|
|||||||
31 POSIX collating elements are not supported
|
31 POSIX collating elements are not supported
|
||||||
32 this version of PCRE is compiled without UTF support
|
32 this version of PCRE is compiled without UTF support
|
||||||
33 [this code is not in use]
|
33 [this code is not in use]
|
||||||
34 character value in \x{...} sequence is too large
|
34 character value in \x{} or \o{} is too large
|
||||||
35 invalid condition (?(0)
|
35 invalid condition (?(0)
|
||||||
36 \C not allowed in lookbehind assertion
|
36 \C not allowed in lookbehind assertion
|
||||||
37 PCRE does not support \L, \l, \N{name}, \U, or \u
|
37 PCRE does not support \L, \l, \N{name}, \U, or \u
|
||||||
@ -938,7 +998,7 @@ have fallen out of use. To avoid confusion, they have not been re-used.
|
|||||||
name/number or by a plain number
|
name/number or by a plain number
|
||||||
58 a numbered reference must not be zero
|
58 a numbered reference must not be zero
|
||||||
59 an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)
|
59 an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)
|
||||||
60 (*VERB) not recognized
|
60 (*VERB) not recognized or malformed
|
||||||
61 number is too big
|
61 number is too big
|
||||||
62 subpattern name expected
|
62 subpattern name expected
|
||||||
63 digit expected after (?+
|
63 digit expected after (?+
|
||||||
@ -958,13 +1018,21 @@ have fallen out of use. To avoid confusion, they have not been re-used.
|
|||||||
75 name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
|
75 name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
|
||||||
76 character value in \u.... sequence is too large
|
76 character value in \u.... sequence is too large
|
||||||
77 invalid UTF-32 string (specifically UTF-32)
|
77 invalid UTF-32 string (specifically UTF-32)
|
||||||
|
78 setting UTF is disabled by the application
|
||||||
|
79 non-hex character in \x{} (closing brace missing?)
|
||||||
|
80 non-octal character in \o{} (closing brace missing?)
|
||||||
|
81 missing opening brace after \o
|
||||||
|
82 parentheses are too deeply nested
|
||||||
|
83 invalid range in character class
|
||||||
|
84 group name must start with a non-digit
|
||||||
|
85 parentheses are too deeply nested (stack check)
|
||||||
</pre>
|
</pre>
|
||||||
The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
|
The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
|
||||||
be used if the limits were changed when PCRE was built.
|
be used if the limits were changed when PCRE was built.
|
||||||
<a name="studyingapattern"></a></P>
|
<a name="studyingapattern"></a></P>
|
||||||
<br><a name="SEC13" href="#TOC1">STUDYING A PATTERN</a><br>
|
<br><a name="SEC13" href="#TOC1">STUDYING A PATTERN</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i></b>
|
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
||||||
<b> const char **<i>errptr</i>);</b>
|
<b> const char **<i>errptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
@ -1069,26 +1137,37 @@ In 32-bit mode, the bitmap is used for 32-bit values less than 256.)
|
|||||||
<P>
|
<P>
|
||||||
These two optimizations apply to both <b>pcre_exec()</b> and
|
These two optimizations apply to both <b>pcre_exec()</b> and
|
||||||
<b>pcre_dfa_exec()</b>, and the information is also used by the JIT compiler.
|
<b>pcre_dfa_exec()</b>, and the information is also used by the JIT compiler.
|
||||||
The optimizations can be disabled by setting the PCRE_NO_START_OPTIMIZE option
|
The optimizations can be disabled by setting the PCRE_NO_START_OPTIMIZE option.
|
||||||
when calling <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>, but if this is done,
|
You might want to do this if your pattern contains callouts or (*MARK) and you
|
||||||
JIT execution is also disabled. You might want to do this if your pattern
|
want to make use of these facilities in cases where matching fails.
|
||||||
contains callouts or (*MARK) and you want to make use of these facilities in
|
</P>
|
||||||
cases where matching fails. See the discussion of PCRE_NO_START_OPTIMIZE
|
<P>
|
||||||
|
PCRE_NO_START_OPTIMIZE can be specified at either compile time or execution
|
||||||
|
time. However, if PCRE_NO_START_OPTIMIZE is passed to <b>pcre_exec()</b>, (that
|
||||||
|
is, after any JIT compilation has happened) JIT execution is disabled. For JIT
|
||||||
|
execution to work with PCRE_NO_START_OPTIMIZE, the option must be set at
|
||||||
|
compile time.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There is a longer discussion of PCRE_NO_START_OPTIMIZE
|
||||||
<a href="#execoptions">below.</a>
|
<a href="#execoptions">below.</a>
|
||||||
<a name="localesupport"></a></P>
|
<a name="localesupport"></a></P>
|
||||||
<br><a name="SEC14" href="#TOC1">LOCALE SUPPORT</a><br>
|
<br><a name="SEC14" href="#TOC1">LOCALE SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
PCRE handles caseless matching, and determines whether characters are letters,
|
PCRE handles caseless matching, and determines whether characters are letters,
|
||||||
digits, or whatever, by reference to a set of tables, indexed by character
|
digits, or whatever, by reference to a set of tables, indexed by character
|
||||||
value. When running in UTF-8 mode, this applies only to characters
|
code point. When running in UTF-8 mode, or in the 16- or 32-bit libraries, this
|
||||||
with codes less than 128. By default, higher-valued codes never match escapes
|
applies only to characters with code points less than 256. By default,
|
||||||
such as \w or \d, but they can be tested with \p if PCRE is built with
|
higher-valued code points never match escapes such as \w or \d. However, if
|
||||||
Unicode character property support. Alternatively, the PCRE_UCP option can be
|
PCRE is built with Unicode property support, all characters can be tested with
|
||||||
set at compile time; this causes \w and friends to use Unicode property
|
\p and \P, or, alternatively, the PCRE_UCP option can be set when a pattern
|
||||||
support instead of built-in tables. The use of locales with Unicode is
|
is compiled; this causes \w and friends to use Unicode property support
|
||||||
discouraged. If you are handling characters with codes greater than 128, you
|
instead of the built-in tables.
|
||||||
should either use UTF-8 and Unicode, or use locales, but not try to mix the
|
</P>
|
||||||
two.
|
<P>
|
||||||
|
The use of locales with Unicode is discouraged. If you are handling characters
|
||||||
|
with code points greater than 128, you should either use Unicode support, or
|
||||||
|
use locales, but not try to mix the two.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
PCRE contains an internal set of tables that are used when the final argument
|
PCRE contains an internal set of tables that are used when the final argument
|
||||||
@ -1106,10 +1185,10 @@ for this locale support is expected to die away.
|
|||||||
<P>
|
<P>
|
||||||
External tables are built by calling the <b>pcre_maketables()</b> function,
|
External tables are built by calling the <b>pcre_maketables()</b> function,
|
||||||
which has no arguments, in the relevant locale. The result can then be passed
|
which has no arguments, in the relevant locale. The result can then be passed
|
||||||
to <b>pcre_compile()</b> or <b>pcre_exec()</b> as often as necessary. For
|
to <b>pcre_compile()</b> as often as necessary. For example, to build and use
|
||||||
example, to build and use tables that are appropriate for the French locale
|
tables that are appropriate for the French locale (where accented characters
|
||||||
(where accented characters with values greater than 128 are treated as letters),
|
with values greater than 128 are treated as letters), the following code could
|
||||||
the following code could be used:
|
be used:
|
||||||
<pre>
|
<pre>
|
||||||
setlocale(LC_CTYPE, "fr_FR");
|
setlocale(LC_CTYPE, "fr_FR");
|
||||||
tables = pcre_maketables();
|
tables = pcre_maketables();
|
||||||
@ -1127,16 +1206,20 @@ needed.
|
|||||||
<P>
|
<P>
|
||||||
The pointer that is passed to <b>pcre_compile()</b> is saved with the compiled
|
The pointer that is passed to <b>pcre_compile()</b> is saved with the compiled
|
||||||
pattern, and the same tables are used via this pointer by <b>pcre_study()</b>
|
pattern, and the same tables are used via this pointer by <b>pcre_study()</b>
|
||||||
and normally also by <b>pcre_exec()</b>. Thus, by default, for any single
|
and also by <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b>. Thus, for any single
|
||||||
pattern, compilation, studying and matching all happen in the same locale, but
|
pattern, compilation, studying and matching all happen in the same locale, but
|
||||||
different patterns can be compiled in different locales.
|
different patterns can be processed in different locales.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
It is possible to pass a table pointer or NULL (indicating the use of the
|
It is possible to pass a table pointer or NULL (indicating the use of the
|
||||||
internal tables) to <b>pcre_exec()</b>. Although not intended for this purpose,
|
internal tables) to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> (see the
|
||||||
this facility could be used to match a pattern in a different locale from the
|
discussion below in the section on matching a pattern). This facility is
|
||||||
one in which it was compiled. Passing table pointers at run time is discussed
|
provided for use with pre-compiled patterns that have been saved and reloaded.
|
||||||
below in the section on matching a pattern.
|
Character tables are not saved with patterns, so if a non-standard table was
|
||||||
|
used at compile time, it must be provided again when the reloaded pattern is
|
||||||
|
matched. Attempting to use this facility to match a pattern in a different
|
||||||
|
locale from the one in which it was compiled is likely to lead to anomalous
|
||||||
|
(usually incorrect) results.
|
||||||
<a name="infoaboutpattern"></a></P>
|
<a name="infoaboutpattern"></a></P>
|
||||||
<br><a name="SEC15" href="#TOC1">INFORMATION ABOUT A PATTERN</a><br>
|
<br><a name="SEC15" href="#TOC1">INFORMATION ABOUT A PATTERN</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -1162,6 +1245,7 @@ the following negative numbers:
|
|||||||
PCRE_ERROR_BADENDIANNESS the pattern was compiled with different
|
PCRE_ERROR_BADENDIANNESS the pattern was compiled with different
|
||||||
endianness
|
endianness
|
||||||
PCRE_ERROR_BADOPTION the value of <i>what</i> was invalid
|
PCRE_ERROR_BADOPTION the value of <i>what</i> was invalid
|
||||||
|
PCRE_ERROR_UNSET the requested field is not set
|
||||||
</pre>
|
</pre>
|
||||||
The "magic number" is placed at the start of each compiled pattern as an simple
|
The "magic number" is placed at the start of each compiled pattern as an simple
|
||||||
check against passing an arbitrary memory pointer. The endianness error can
|
check against passing an arbitrary memory pointer. The endianness error can
|
||||||
@ -1199,12 +1283,15 @@ information call is provided for internal use by the <b>pcre_study()</b>
|
|||||||
function. External callers can cause PCRE to use its internal tables by passing
|
function. External callers can cause PCRE to use its internal tables by passing
|
||||||
a NULL table pointer.
|
a NULL table pointer.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_INFO_FIRSTBYTE
|
PCRE_INFO_FIRSTBYTE (deprecated)
|
||||||
</pre>
|
</pre>
|
||||||
Return information about the first data unit of any matched string, for a
|
Return information about the first data unit of any matched string, for a
|
||||||
non-anchored pattern. (The name of this option refers to the 8-bit library,
|
non-anchored pattern. The name of this option refers to the 8-bit library,
|
||||||
where data units are bytes.) The fourth argument should point to an <b>int</b>
|
where data units are bytes. The fourth argument should point to an <b>int</b>
|
||||||
variable.
|
variable. Negative values are used for special cases. However, this means that
|
||||||
|
when the 32-bit library is in non-UTF-32 mode, the full 32-bit range of
|
||||||
|
characters cannot be returned. For this reason, this value is deprecated; use
|
||||||
|
PCRE_INFO_FIRSTCHARACTERFLAGS and PCRE_INFO_FIRSTCHARACTER instead.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If there is a fixed first value, for example, the letter "c" from a pattern
|
If there is a fixed first value, for example, the letter "c" from a pattern
|
||||||
@ -1227,12 +1314,43 @@ starts with "^", or
|
|||||||
-1 is returned, indicating that the pattern matches only at the start of a
|
-1 is returned, indicating that the pattern matches only at the start of a
|
||||||
subject string or after any newline within the string. Otherwise -2 is
|
subject string or after any newline within the string. Otherwise -2 is
|
||||||
returned. For anchored patterns, -2 is returned.
|
returned. For anchored patterns, -2 is returned.
|
||||||
|
<pre>
|
||||||
|
PCRE_INFO_FIRSTCHARACTER
|
||||||
|
</pre>
|
||||||
|
Return the value of the first data unit (non-UTF character) of any matched
|
||||||
|
string in the situation where PCRE_INFO_FIRSTCHARACTERFLAGS returns 1;
|
||||||
|
otherwise return 0. The fourth argument should point to an <b>uint_t</b>
|
||||||
|
variable.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Since for the 32-bit library using the non-UTF-32 mode, this function is unable
|
In the 8-bit library, the value is always less than 256. In the 16-bit library
|
||||||
to return the full 32-bit range of the character, this value is deprecated;
|
the value can be up to 0xffff. In the 32-bit library in UTF-32 mode the value
|
||||||
instead the PCRE_INFO_FIRSTCHARACTERFLAGS and PCRE_INFO_FIRSTCHARACTER values
|
can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32 mode.
|
||||||
should be used.
|
<pre>
|
||||||
|
PCRE_INFO_FIRSTCHARACTERFLAGS
|
||||||
|
</pre>
|
||||||
|
Return information about the first data unit of any matched string, for a
|
||||||
|
non-anchored pattern. The fourth argument should point to an <b>int</b>
|
||||||
|
variable.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If there is a fixed first value, for example, the letter "c" from a pattern
|
||||||
|
such as (cat|cow|coyote), 1 is returned, and the character value can be
|
||||||
|
retrieved using PCRE_INFO_FIRSTCHARACTER. If there is no fixed first value, and
|
||||||
|
if either
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
(a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
|
||||||
|
starts with "^", or
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
(b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set
|
||||||
|
(if it were set, the pattern would be anchored),
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
2 is returned, indicating that the pattern matches only at the start of a
|
||||||
|
subject string or after any newline within the string. Otherwise 0 is
|
||||||
|
returned. For anchored patterns, 0 is returned.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_INFO_FIRSTTABLE
|
PCRE_INFO_FIRSTTABLE
|
||||||
</pre>
|
</pre>
|
||||||
@ -1281,26 +1399,43 @@ is -1.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Since for the 32-bit library using the non-UTF-32 mode, this function is unable
|
Since for the 32-bit library using the non-UTF-32 mode, this function is unable
|
||||||
to return the full 32-bit range of the character, this value is deprecated;
|
to return the full 32-bit range of characters, this value is deprecated;
|
||||||
instead the PCRE_INFO_REQUIREDCHARFLAGS and PCRE_INFO_REQUIREDCHAR values should
|
instead the PCRE_INFO_REQUIREDCHARFLAGS and PCRE_INFO_REQUIREDCHAR values should
|
||||||
be used.
|
be used.
|
||||||
|
<pre>
|
||||||
|
PCRE_INFO_MATCH_EMPTY
|
||||||
|
</pre>
|
||||||
|
Return 1 if the pattern can match an empty string, otherwise 0. The fourth
|
||||||
|
argument should point to an <b>int</b> variable.
|
||||||
|
<pre>
|
||||||
|
PCRE_INFO_MATCHLIMIT
|
||||||
|
</pre>
|
||||||
|
If the pattern set a match limit by including an item of the form
|
||||||
|
(*LIMIT_MATCH=nnnn) at the start, the value is returned. The fourth argument
|
||||||
|
should point to an unsigned 32-bit integer. If no such value has been set, the
|
||||||
|
call to <b>pcre_fullinfo()</b> returns the error PCRE_ERROR_UNSET.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_INFO_MAXLOOKBEHIND
|
PCRE_INFO_MAXLOOKBEHIND
|
||||||
</pre>
|
</pre>
|
||||||
Return the number of characters (NB not bytes) in the longest lookbehind
|
Return the number of characters (NB not data units) in the longest lookbehind
|
||||||
assertion in the pattern. Note that the simple assertions \b and \B require a
|
assertion in the pattern. This information is useful when doing multi-segment
|
||||||
one-character lookbehind. This information is useful when doing multi-segment
|
matching using the partial matching facilities. Note that the simple assertions
|
||||||
matching using the partial matching facilities.
|
\b and \B require a one-character lookbehind. \A also registers a
|
||||||
|
one-character lookbehind, though it does not actually inspect the previous
|
||||||
|
character. This is to ensure that at least one character from the old segment
|
||||||
|
is retained when a new segment is processed. Otherwise, if there are no
|
||||||
|
lookbehinds in the pattern, \A might match incorrectly at the start of a new
|
||||||
|
segment.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_INFO_MINLENGTH
|
PCRE_INFO_MINLENGTH
|
||||||
</pre>
|
</pre>
|
||||||
If the pattern was studied and a minimum length for matching subject strings
|
If the pattern was studied and a minimum length for matching subject strings
|
||||||
was computed, its value is returned. Otherwise the returned value is -1. The
|
was computed, its value is returned. Otherwise the returned value is -1. The
|
||||||
value is a number of characters, which in UTF-8 mode may be different from the
|
value is a number of characters, which in UTF mode may be different from the
|
||||||
number of bytes. The fourth argument should point to an <b>int</b> variable. A
|
number of data units. The fourth argument should point to an <b>int</b>
|
||||||
non-negative value is a lower bound to the length of any matching string. There
|
variable. A non-negative value is a lower bound to the length of any matching
|
||||||
may not be any strings of that length that do actually match, but every string
|
string. There may not be any strings of that length that do actually match, but
|
||||||
that does match is at least that long.
|
every string that does match is at least that long.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_INFO_NAMECOUNT
|
PCRE_INFO_NAMECOUNT
|
||||||
PCRE_INFO_NAMEENTRYSIZE
|
PCRE_INFO_NAMEENTRYSIZE
|
||||||
@ -1324,22 +1459,24 @@ length of the longest name. PCRE_INFO_NAMETABLE returns a pointer to the first
|
|||||||
entry of the table. This is a pointer to <b>char</b> in the 8-bit library, where
|
entry of the table. This is a pointer to <b>char</b> in the 8-bit library, where
|
||||||
the first two bytes of each entry are the number of the capturing parenthesis,
|
the first two bytes of each entry are the number of the capturing parenthesis,
|
||||||
most significant byte first. In the 16-bit library, the pointer points to
|
most significant byte first. In the 16-bit library, the pointer points to
|
||||||
16-bit data units, the first of which contains the parenthesis number.
|
16-bit data units, the first of which contains the parenthesis number. In the
|
||||||
In the 32-bit library, the pointer points to 32-bit data units, the first of
|
32-bit library, the pointer points to 32-bit data units, the first of which
|
||||||
which contains the parenthesis number. The rest
|
contains the parenthesis number. The rest of the entry is the corresponding
|
||||||
of the entry is the corresponding name, zero terminated.
|
name, zero terminated.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The names are in alphabetical order. Duplicate names may appear if (?| is used
|
The names are in alphabetical order. If (?| is used to create multiple groups
|
||||||
to create multiple groups with the same number, as described in the
|
with the same number, as described in the
|
||||||
<a href="pcrepattern.html#dupsubpatternnumber">section on duplicate subpattern numbers</a>
|
<a href="pcrepattern.html#dupsubpatternnumber">section on duplicate subpattern numbers</a>
|
||||||
in the
|
in the
|
||||||
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
||||||
page. Duplicate names for subpatterns with different numbers are permitted only
|
page, the groups may be given the same name, but there is only one entry in the
|
||||||
if PCRE_DUPNAMES is set. In all cases of duplicate names, they appear in the
|
table. Different names for groups of the same number are not permitted.
|
||||||
table in the order in which they were found in the pattern. In the absence of
|
Duplicate names for subpatterns with different numbers are permitted,
|
||||||
(?| this is the order of increasing number; when (?| is used this is not
|
but only if PCRE_DUPNAMES is set. They appear in the table in the order in
|
||||||
necessarily the case because later subpatterns may have lower numbers.
|
which they were found in the pattern. In the absence of (?| this is the order
|
||||||
|
of increasing number; when (?| is used this is not necessarily the case because
|
||||||
|
later subpatterns may have lower numbers.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
As a simple example of the name/number table, consider the following pattern
|
As a simple example of the name/number table, consider the following pattern
|
||||||
@ -1391,10 +1528,17 @@ alternatives begin with one of the following:
|
|||||||
</pre>
|
</pre>
|
||||||
For such patterns, the PCRE_ANCHORED bit is set in the options returned by
|
For such patterns, the PCRE_ANCHORED bit is set in the options returned by
|
||||||
<b>pcre_fullinfo()</b>.
|
<b>pcre_fullinfo()</b>.
|
||||||
|
<pre>
|
||||||
|
PCRE_INFO_RECURSIONLIMIT
|
||||||
|
</pre>
|
||||||
|
If the pattern set a recursion limit by including an item of the form
|
||||||
|
(*LIMIT_RECURSION=nnnn) at the start, the value is returned. The fourth
|
||||||
|
argument should point to an unsigned 32-bit integer. If no such value has been
|
||||||
|
set, the call to <b>pcre_fullinfo()</b> returns the error PCRE_ERROR_UNSET.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_INFO_SIZE
|
PCRE_INFO_SIZE
|
||||||
</pre>
|
</pre>
|
||||||
Return the size of the compiled pattern in bytes (for both libraries). The
|
Return the size of the compiled pattern in bytes (for all three libraries). The
|
||||||
fourth argument should point to a <b>size_t</b> variable. This value does not
|
fourth argument should point to a <b>size_t</b> variable. This value does not
|
||||||
include the size of the <b>pcre</b> structure that is returned by
|
include the size of the <b>pcre</b> structure that is returned by
|
||||||
<b>pcre_compile()</b>. The value that is passed as the argument to
|
<b>pcre_compile()</b>. The value that is passed as the argument to
|
||||||
@ -1405,70 +1549,17 @@ does not alter the value returned by this option.
|
|||||||
<pre>
|
<pre>
|
||||||
PCRE_INFO_STUDYSIZE
|
PCRE_INFO_STUDYSIZE
|
||||||
</pre>
|
</pre>
|
||||||
Return the size in bytes of the data block pointed to by the <i>study_data</i>
|
Return the size in bytes (for all three libraries) of the data block pointed to
|
||||||
field in a <b>pcre_extra</b> block. If <b>pcre_extra</b> is NULL, or there is no
|
by the <i>study_data</i> field in a <b>pcre_extra</b> block. If <b>pcre_extra</b>
|
||||||
study data, zero is returned. The fourth argument should point to a
|
is NULL, or there is no study data, zero is returned. The fourth argument
|
||||||
<b>size_t</b> variable. The <i>study_data</i> field is set by <b>pcre_study()</b>
|
should point to a <b>size_t</b> variable. The <i>study_data</i> field is set by
|
||||||
to record information that will speed up matching (see the section entitled
|
<b>pcre_study()</b> to record information that will speed up matching (see the
|
||||||
|
section entitled
|
||||||
<a href="#studyingapattern">"Studying a pattern"</a>
|
<a href="#studyingapattern">"Studying a pattern"</a>
|
||||||
above). The format of the <i>study_data</i> block is private, but its length
|
above). The format of the <i>study_data</i> block is private, but its length
|
||||||
is made available via this option so that it can be saved and restored (see the
|
is made available via this option so that it can be saved and restored (see the
|
||||||
<a href="pcreprecompile.html"><b>pcreprecompile</b></a>
|
<a href="pcreprecompile.html"><b>pcreprecompile</b></a>
|
||||||
documentation for details).
|
documentation for details).
|
||||||
<pre>
|
|
||||||
PCRE_INFO_FIRSTCHARACTERFLAGS
|
|
||||||
</pre>
|
|
||||||
Return information about the first data unit of any matched string, for a
|
|
||||||
non-anchored pattern. The fourth argument should point to an <b>int</b>
|
|
||||||
variable.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
If there is a fixed first value, for example, the letter "c" from a pattern
|
|
||||||
such as (cat|cow|coyote), 1 is returned, and the character value can be
|
|
||||||
retrieved using PCRE_INFO_FIRSTCHARACTER.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
If there is no fixed first value, and if either
|
|
||||||
<br>
|
|
||||||
<br>
|
|
||||||
(a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
|
|
||||||
starts with "^", or
|
|
||||||
<br>
|
|
||||||
<br>
|
|
||||||
(b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set
|
|
||||||
(if it were set, the pattern would be anchored),
|
|
||||||
<br>
|
|
||||||
<br>
|
|
||||||
2 is returned, indicating that the pattern matches only at the start of a
|
|
||||||
subject string or after any newline within the string. Otherwise 0 is
|
|
||||||
returned. For anchored patterns, 0 is returned.
|
|
||||||
<pre>
|
|
||||||
PCRE_INFO_FIRSTCHARACTER
|
|
||||||
</pre>
|
|
||||||
Return the fixed first character value, if PCRE_INFO_FIRSTCHARACTERFLAGS
|
|
||||||
returned 1; otherwise returns 0. The fourth argument should point to an
|
|
||||||
<b>uint_t</b> variable.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
In the 8-bit library, the value is always less than 256. In the 16-bit library
|
|
||||||
the value can be up to 0xffff. In the 32-bit library in UTF-32 mode the value
|
|
||||||
can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32 mode.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
If there is no fixed first value, and if either
|
|
||||||
<br>
|
|
||||||
<br>
|
|
||||||
(a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
|
|
||||||
starts with "^", or
|
|
||||||
<br>
|
|
||||||
<br>
|
|
||||||
(b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set
|
|
||||||
(if it were set, the pattern would be anchored),
|
|
||||||
<br>
|
|
||||||
<br>
|
|
||||||
-1 is returned, indicating that the pattern matches only at the start of a
|
|
||||||
subject string or after any newline within the string. Otherwise -2 is
|
|
||||||
returned. For anchored patterns, -2 is returned.
|
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_INFO_REQUIREDCHARFLAGS
|
PCRE_INFO_REQUIREDCHARFLAGS
|
||||||
</pre>
|
</pre>
|
||||||
@ -1634,6 +1725,16 @@ the <i>flags</i> field. If the limit is exceeded, <b>pcre_exec()</b> returns
|
|||||||
PCRE_ERROR_MATCHLIMIT.
|
PCRE_ERROR_MATCHLIMIT.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
A value for the match limit may also be supplied by an item at the start of a
|
||||||
|
pattern of the form
|
||||||
|
<pre>
|
||||||
|
(*LIMIT_MATCH=d)
|
||||||
|
</pre>
|
||||||
|
where d is a decimal number. However, such a setting is ignored unless d is
|
||||||
|
less than the limit set by the caller of <b>pcre_exec()</b> or, if no such limit
|
||||||
|
is set, less than the default.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
The <i>match_limit_recursion</i> field is similar to <i>match_limit</i>, but
|
The <i>match_limit_recursion</i> field is similar to <i>match_limit</i>, but
|
||||||
instead of limiting the total number of times that <b>match()</b> is called, it
|
instead of limiting the total number of times that <b>match()</b> is called, it
|
||||||
limits the depth of recursion. The recursion depth is a smaller number than the
|
limits the depth of recursion. The recursion depth is a smaller number than the
|
||||||
@ -1655,23 +1756,38 @@ PCRE_EXTRA_MATCH_LIMIT_RECURSION is set in the <i>flags</i> field. If the limit
|
|||||||
is exceeded, <b>pcre_exec()</b> returns PCRE_ERROR_RECURSIONLIMIT.
|
is exceeded, <b>pcre_exec()</b> returns PCRE_ERROR_RECURSIONLIMIT.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
A value for the recursion limit may also be supplied by an item at the start of
|
||||||
|
a pattern of the form
|
||||||
|
<pre>
|
||||||
|
(*LIMIT_RECURSION=d)
|
||||||
|
</pre>
|
||||||
|
where d is a decimal number. However, such a setting is ignored unless d is
|
||||||
|
less than the limit set by the caller of <b>pcre_exec()</b> or, if no such limit
|
||||||
|
is set, less than the default.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
The <i>callout_data</i> field is used in conjunction with the "callout" feature,
|
The <i>callout_data</i> field is used in conjunction with the "callout" feature,
|
||||||
and is described in the
|
and is described in the
|
||||||
<a href="pcrecallout.html"><b>pcrecallout</b></a>
|
<a href="pcrecallout.html"><b>pcrecallout</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>tables</i> field is used to pass a character tables pointer to
|
The <i>tables</i> field is provided for use with patterns that have been
|
||||||
<b>pcre_exec()</b>; this overrides the value that is stored with the compiled
|
pre-compiled using custom character tables, saved to disc or elsewhere, and
|
||||||
pattern. A non-NULL value is stored with the compiled pattern only if custom
|
then reloaded, because the tables that were used to compile a pattern are not
|
||||||
tables were supplied to <b>pcre_compile()</b> via its <i>tableptr</i> argument.
|
saved with it. See the
|
||||||
If NULL is passed to <b>pcre_exec()</b> using this mechanism, it forces PCRE's
|
|
||||||
internal tables to be used. This facility is helpful when re-using patterns
|
|
||||||
that have been saved after compiling with an external set of tables, because
|
|
||||||
the external tables might be at a different address when <b>pcre_exec()</b> is
|
|
||||||
called. See the
|
|
||||||
<a href="pcreprecompile.html"><b>pcreprecompile</b></a>
|
<a href="pcreprecompile.html"><b>pcreprecompile</b></a>
|
||||||
documentation for a discussion of saving compiled patterns for later use.
|
documentation for a discussion of saving compiled patterns for later use. If
|
||||||
|
NULL is passed using this mechanism, it forces PCRE's internal tables to be
|
||||||
|
used.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>Warning:</b> The tables that <b>pcre_exec()</b> uses must be the same as those
|
||||||
|
that were used when the pattern was compiled. If this is not the case, the
|
||||||
|
behaviour of <b>pcre_exec()</b> is undefined. Therefore, when a pattern is
|
||||||
|
compiled and matched in the same process, this field should never be set. In
|
||||||
|
this (the most common) case, the correct table pointer is automatically passed
|
||||||
|
with the compiled pattern from <b>pcre_compile()</b> to <b>pcre_exec()</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If PCRE_EXTRA_MARK is set in the <i>flags</i> field, the <i>mark</i> field must
|
If PCRE_EXTRA_MARK is set in the <i>flags</i> field, the <i>mark</i> field must
|
||||||
@ -1816,10 +1932,10 @@ unanchored match must start with a specific character, it searches the subject
|
|||||||
for that character, and fails immediately if it cannot find it, without
|
for that character, and fails immediately if it cannot find it, without
|
||||||
actually running the main matching function. This means that a special item
|
actually running the main matching function. This means that a special item
|
||||||
such as (*COMMIT) at the start of a pattern is not considered until after a
|
such as (*COMMIT) at the start of a pattern is not considered until after a
|
||||||
suitable starting point for the match has been found. When callouts or (*MARK)
|
suitable starting point for the match has been found. Also, when callouts or
|
||||||
items are in use, these "start-up" optimizations can cause them to be skipped
|
(*MARK) items are in use, these "start-up" optimizations can cause them to be
|
||||||
if the pattern is never actually used. The start-up optimizations are in effect
|
skipped if the pattern is never actually used. The start-up optimizations are
|
||||||
a pre-scan of the subject that takes place before the pattern is run.
|
in effect a pre-scan of the subject that takes place before the pattern is run.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The PCRE_NO_START_OPTIMIZE option disables the start-up optimizations, possibly
|
The PCRE_NO_START_OPTIMIZE option disables the start-up optimizations, possibly
|
||||||
@ -1827,8 +1943,9 @@ causing performance to suffer, but ensuring that in cases where the result is
|
|||||||
"no match", the callouts do occur, and that items such as (*COMMIT) and (*MARK)
|
"no match", the callouts do occur, and that items such as (*COMMIT) and (*MARK)
|
||||||
are considered at every possible starting position in the subject string. If
|
are considered at every possible starting position in the subject string. If
|
||||||
PCRE_NO_START_OPTIMIZE is set at compile time, it cannot be unset at matching
|
PCRE_NO_START_OPTIMIZE is set at compile time, it cannot be unset at matching
|
||||||
time. The use of PCRE_NO_START_OPTIMIZE disables JIT execution; when it is set,
|
time. The use of PCRE_NO_START_OPTIMIZE at matching time (that is, passing it
|
||||||
matching is always done using interpretively.
|
to <b>pcre_exec()</b>) disables JIT execution; in this situation, matching is
|
||||||
|
always done using interpretively.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Setting PCRE_NO_START_OPTIMIZE can change the outcome of a matching operation.
|
Setting PCRE_NO_START_OPTIMIZE can change the outcome of a matching operation.
|
||||||
@ -1888,7 +2005,7 @@ all the matches in a single subject string. However, you should be sure that
|
|||||||
the value of <i>startoffset</i> points to the start of a character (or the end
|
the value of <i>startoffset</i> points to the start of a character (or the end
|
||||||
of the subject). When PCRE_NO_UTF8_CHECK is set, the effect of passing an
|
of the subject). When PCRE_NO_UTF8_CHECK is set, the effect of passing an
|
||||||
invalid string as a subject or an invalid value of <i>startoffset</i> is
|
invalid string as a subject or an invalid value of <i>startoffset</i> is
|
||||||
undefined. Your program may crash.
|
undefined. Your program may crash or loop.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_PARTIAL_HARD
|
PCRE_PARTIAL_HARD
|
||||||
PCRE_PARTIAL_SOFT
|
PCRE_PARTIAL_SOFT
|
||||||
@ -1922,13 +2039,19 @@ The string to be matched by <b>pcre_exec()</b>
|
|||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The subject string is passed to <b>pcre_exec()</b> as a pointer in
|
The subject string is passed to <b>pcre_exec()</b> as a pointer in
|
||||||
<i>subject</i>, a length in bytes in <i>length</i>, and a starting byte offset
|
<i>subject</i>, a length in <i>length</i>, and a starting offset in
|
||||||
in <i>startoffset</i>. If this is negative or greater than the length of the
|
<i>startoffset</i>. The units for <i>length</i> and <i>startoffset</i> are bytes
|
||||||
subject, <b>pcre_exec()</b> returns PCRE_ERROR_BADOFFSET. When the starting
|
for the 8-bit library, 16-bit data items for the 16-bit library, and 32-bit
|
||||||
offset is zero, the search for a match starts at the beginning of the subject,
|
data items for the 32-bit library.
|
||||||
and this is by far the most common case. In UTF-8 mode, the byte offset must
|
</P>
|
||||||
point to the start of a UTF-8 character (or the end of the subject). Unlike the
|
<P>
|
||||||
pattern string, the subject may contain binary zero bytes.
|
If <i>startoffset</i> is negative or greater than the length of the subject,
|
||||||
|
<b>pcre_exec()</b> returns PCRE_ERROR_BADOFFSET. When the starting offset is
|
||||||
|
zero, the search for a match starts at the beginning of the subject, and this
|
||||||
|
is by far the most common case. In UTF-8 or UTF-16 mode, the offset must point
|
||||||
|
to the start of a character, or the end of the subject (in UTF-32 mode, one
|
||||||
|
data unit equals one character, so all offsets are valid). Unlike the pattern
|
||||||
|
string, the subject may contain binary zeroes.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A non-zero starting offset is useful when searching for another match in the
|
A non-zero starting offset is useful when searching for another match in the
|
||||||
@ -1996,10 +2119,12 @@ rounded down.
|
|||||||
When a match is successful, information about captured substrings is returned
|
When a match is successful, information about captured substrings is returned
|
||||||
in pairs of integers, starting at the beginning of <i>ovector</i>, and
|
in pairs of integers, starting at the beginning of <i>ovector</i>, and
|
||||||
continuing up to two-thirds of its length at the most. The first element of
|
continuing up to two-thirds of its length at the most. The first element of
|
||||||
each pair is set to the byte offset of the first character in a substring, and
|
each pair is set to the offset of the first character in a substring, and the
|
||||||
the second is set to the byte offset of the first character after the end of a
|
second is set to the offset of the first character after the end of a
|
||||||
substring. <b>Note</b>: these values are always byte offsets, even in UTF-8
|
substring. These values are always data unit offsets, even in UTF mode. They
|
||||||
mode. They are not character counts.
|
are byte offsets in the 8-bit library, 16-bit data item offsets in the 16-bit
|
||||||
|
library, and 32-bit data item offsets in the 32-bit library. <b>Note</b>: they
|
||||||
|
are not character counts.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The first pair of integers, <i>ovector[0]</i> and <i>ovector[1]</i>, identify the
|
The first pair of integers, <i>ovector[0]</i> and <i>ovector[1]</i>, identify the
|
||||||
@ -2332,23 +2457,25 @@ character.
|
|||||||
The first byte of a character has the value 0xfe or 0xff. These values can
|
The first byte of a character has the value 0xfe or 0xff. These values can
|
||||||
never occur in a valid UTF-8 string.
|
never occur in a valid UTF-8 string.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_UTF8_ERR2
|
PCRE_UTF8_ERR22
|
||||||
</pre>
|
</pre>
|
||||||
Non-character. These are the last two characters in each plane (0xfffe, 0xffff,
|
This error code was formerly used when the presence of a so-called
|
||||||
0x1fffe, 0x1ffff .. 0x10fffe, 0x10ffff), and the characters 0xfdd0..0xfdef.
|
"non-character" caused an error. Unicode corrigendum #9 makes it clear that
|
||||||
|
such characters should not cause a string to be rejected, and so this code is
|
||||||
|
no longer in use and is never returned.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC18" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
<br><a name="SEC18" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
||||||
<b> int <i>buffersize</i>);</b>
|
<b> int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
<b> const char **<i>stringptr</i>);</b>
|
<b> const char **<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
||||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
<b> int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
@ -2436,14 +2563,14 @@ provided.
|
|||||||
<P>
|
<P>
|
||||||
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
||||||
<b> const char *<i>name</i>);</b>
|
<b> const char *<i>name</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
|
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
|
||||||
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||||
<b> char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
<b> char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
|
||||||
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||||
@ -2711,6 +2838,15 @@ matching string is given first. If there were too many matches to fit into
|
|||||||
the longest matches. Unlike <b>pcre_exec()</b>, <b>pcre_dfa_exec()</b> can use
|
the longest matches. Unlike <b>pcre_exec()</b>, <b>pcre_dfa_exec()</b> can use
|
||||||
the entire <i>ovector</i> for returning matched strings.
|
the entire <i>ovector</i> for returning matched strings.
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
NOTE: PCRE's "auto-possessification" optimization usually applies to character
|
||||||
|
repeats at the end of a pattern (as well as internally). For example, the
|
||||||
|
pattern "a\d+" is compiled as if it were "a\d++" because there is no point
|
||||||
|
even considering the possibility of backtracking into the repeated digits. For
|
||||||
|
DFA matching, this means that only one possible match is found. If you really
|
||||||
|
do want multiple matches in such cases, either use an ungreedy repeat
|
||||||
|
("a\d+?") or set the PCRE_NO_AUTO_POSSESS option when compiling.
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Error returns from <b>pcre_dfa_exec()</b>
|
Error returns from <b>pcre_dfa_exec()</b>
|
||||||
</b><br>
|
</b><br>
|
||||||
@ -2777,9 +2913,9 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 08 November 2012
|
Last updated: 09 February 2014
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2014 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -13,46 +13,63 @@ from the original man page. If there is any nonsense in it, please consult the
|
|||||||
man page, in case the conversion went wrong.
|
man page, in case the conversion went wrong.
|
||||||
<br>
|
<br>
|
||||||
<ul>
|
<ul>
|
||||||
<li><a name="TOC1" href="#SEC1">PCRE BUILD-TIME OPTIONS</a>
|
<li><a name="TOC1" href="#SEC1">BUILDING PCRE</a>
|
||||||
<li><a name="TOC2" href="#SEC2">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
|
<li><a name="TOC2" href="#SEC2">PCRE BUILD-TIME OPTIONS</a>
|
||||||
<li><a name="TOC3" href="#SEC3">BUILDING SHARED AND STATIC LIBRARIES</a>
|
<li><a name="TOC3" href="#SEC3">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
|
||||||
<li><a name="TOC4" href="#SEC4">C++ SUPPORT</a>
|
<li><a name="TOC4" href="#SEC4">BUILDING SHARED AND STATIC LIBRARIES</a>
|
||||||
<li><a name="TOC5" href="#SEC5">UTF-8, UTF-16 AND UTF-32 SUPPORT</a>
|
<li><a name="TOC5" href="#SEC5">C++ SUPPORT</a>
|
||||||
<li><a name="TOC6" href="#SEC6">UNICODE CHARACTER PROPERTY SUPPORT</a>
|
<li><a name="TOC6" href="#SEC6">UTF-8, UTF-16 AND UTF-32 SUPPORT</a>
|
||||||
<li><a name="TOC7" href="#SEC7">JUST-IN-TIME COMPILER SUPPORT</a>
|
<li><a name="TOC7" href="#SEC7">UNICODE CHARACTER PROPERTY SUPPORT</a>
|
||||||
<li><a name="TOC8" href="#SEC8">CODE VALUE OF NEWLINE</a>
|
<li><a name="TOC8" href="#SEC8">JUST-IN-TIME COMPILER SUPPORT</a>
|
||||||
<li><a name="TOC9" href="#SEC9">WHAT \R MATCHES</a>
|
<li><a name="TOC9" href="#SEC9">CODE VALUE OF NEWLINE</a>
|
||||||
<li><a name="TOC10" href="#SEC10">POSIX MALLOC USAGE</a>
|
<li><a name="TOC10" href="#SEC10">WHAT \R MATCHES</a>
|
||||||
<li><a name="TOC11" href="#SEC11">HANDLING VERY LARGE PATTERNS</a>
|
<li><a name="TOC11" href="#SEC11">POSIX MALLOC USAGE</a>
|
||||||
<li><a name="TOC12" href="#SEC12">AVOIDING EXCESSIVE STACK USAGE</a>
|
<li><a name="TOC12" href="#SEC12">HANDLING VERY LARGE PATTERNS</a>
|
||||||
<li><a name="TOC13" href="#SEC13">LIMITING PCRE RESOURCE USAGE</a>
|
<li><a name="TOC13" href="#SEC13">AVOIDING EXCESSIVE STACK USAGE</a>
|
||||||
<li><a name="TOC14" href="#SEC14">CREATING CHARACTER TABLES AT BUILD TIME</a>
|
<li><a name="TOC14" href="#SEC14">LIMITING PCRE RESOURCE USAGE</a>
|
||||||
<li><a name="TOC15" href="#SEC15">USING EBCDIC CODE</a>
|
<li><a name="TOC15" href="#SEC15">CREATING CHARACTER TABLES AT BUILD TIME</a>
|
||||||
<li><a name="TOC16" href="#SEC16">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
|
<li><a name="TOC16" href="#SEC16">USING EBCDIC CODE</a>
|
||||||
<li><a name="TOC17" href="#SEC17">PCREGREP BUFFER SIZE</a>
|
<li><a name="TOC17" href="#SEC17">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
|
||||||
<li><a name="TOC18" href="#SEC18">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a>
|
<li><a name="TOC18" href="#SEC18">PCREGREP BUFFER SIZE</a>
|
||||||
<li><a name="TOC19" href="#SEC19">DEBUGGING WITH VALGRIND SUPPORT</a>
|
<li><a name="TOC19" href="#SEC19">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a>
|
||||||
<li><a name="TOC20" href="#SEC20">CODE COVERAGE REPORTING</a>
|
<li><a name="TOC20" href="#SEC20">DEBUGGING WITH VALGRIND SUPPORT</a>
|
||||||
<li><a name="TOC21" href="#SEC21">SEE ALSO</a>
|
<li><a name="TOC21" href="#SEC21">CODE COVERAGE REPORTING</a>
|
||||||
<li><a name="TOC22" href="#SEC22">AUTHOR</a>
|
<li><a name="TOC22" href="#SEC22">SEE ALSO</a>
|
||||||
<li><a name="TOC23" href="#SEC23">REVISION</a>
|
<li><a name="TOC23" href="#SEC23">AUTHOR</a>
|
||||||
|
<li><a name="TOC24" href="#SEC24">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<br><a name="SEC1" href="#TOC1">PCRE BUILD-TIME OPTIONS</a><br>
|
<br><a name="SEC1" href="#TOC1">BUILDING PCRE</a><br>
|
||||||
<P>
|
<P>
|
||||||
This document describes the optional features of PCRE that can be selected when
|
PCRE is distributed with a <b>configure</b> script that can be used to build the
|
||||||
the library is compiled. It assumes use of the <b>configure</b> script, where
|
library in Unix-like environments using the applications known as Autotools.
|
||||||
the optional features are selected or deselected by providing options to
|
Also in the distribution are files to support building using <b>CMake</b>
|
||||||
<b>configure</b> before running the <b>make</b> command. However, the same
|
instead of <b>configure</b>. The text file
|
||||||
options can be selected in both Unix-like and non-Unix-like environments using
|
<a href="README.txt"><b>README</b></a>
|
||||||
the GUI facility of <b>cmake-gui</b> if you are using <b>CMake</b> instead of
|
contains general information about building with Autotools (some of which is
|
||||||
<b>configure</b> to build PCRE.
|
repeated below), and also has some comments about building on various operating
|
||||||
|
systems. There is a lot more information about building PCRE without using
|
||||||
|
Autotools (including information about using <b>CMake</b> and building "by
|
||||||
|
hand") in the text file called
|
||||||
|
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS-BUILD</b>.</a>
|
||||||
|
You should consult this file as well as the
|
||||||
|
<a href="README.txt"><b>README</b></a>
|
||||||
|
file if you are building in a non-Unix-like environment.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC2" href="#TOC1">PCRE BUILD-TIME OPTIONS</a><br>
|
||||||
|
<P>
|
||||||
|
The rest of this document describes the optional features of PCRE that can be
|
||||||
|
selected when the library is compiled. It assumes use of the <b>configure</b>
|
||||||
|
script, where the optional features are selected or deselected by providing
|
||||||
|
options to <b>configure</b> before running the <b>make</b> command. However, the
|
||||||
|
same options can be selected in both Unix-like and non-Unix-like environments
|
||||||
|
using the GUI facility of <b>cmake-gui</b> if you are using <b>CMake</b> instead
|
||||||
|
of <b>configure</b> to build PCRE.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a lot more information about building PCRE without using
|
If you are not using Autotools or <b>CMake</b>, option selection can be done by
|
||||||
<b>configure</b> (including information about using <b>CMake</b> or building "by
|
editing the <b>config.h</b> file, or by passing parameter settings to the
|
||||||
hand") in the file called <i>NON-AUTOTOOLS-BUILD</i>, which is part of the PCRE
|
compiler, as described in
|
||||||
distribution. You should consult this file as well as the <i>README</i> file if
|
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS-BUILD</b>.</a>
|
||||||
you are building in a non-Unix-like environment.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The complete list of options for <b>configure</b> (which includes the standard
|
The complete list of options for <b>configure</b> (which includes the standard
|
||||||
@ -67,7 +84,7 @@ The following sections include descriptions of options whose names begin with
|
|||||||
--enable and --disable always come in pairs, so the complementary option always
|
--enable and --disable always come in pairs, so the complementary option always
|
||||||
exists as well, but as it specifies the default, it is not described.
|
exists as well, but as it specifies the default, it is not described.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
|
<br><a name="SEC3" href="#TOC1">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
|
||||||
<P>
|
<P>
|
||||||
By default, a library called <b>libpcre</b> is built, containing functions that
|
By default, a library called <b>libpcre</b> is built, containing functions that
|
||||||
take string arguments contained in vectors of bytes, either as single-byte
|
take string arguments contained in vectors of bytes, either as single-byte
|
||||||
@ -78,7 +95,7 @@ strings, by adding
|
|||||||
<pre>
|
<pre>
|
||||||
--enable-pcre16
|
--enable-pcre16
|
||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command. You can also build a separate
|
to the <b>configure</b> command. You can also build yet another separate
|
||||||
library, called <b>libpcre32</b>, in which strings are contained in vectors of
|
library, called <b>libpcre32</b>, in which strings are contained in vectors of
|
||||||
32-bit data units and interpreted either as single-unit characters or UTF-32
|
32-bit data units and interpreted either as single-unit characters or UTF-32
|
||||||
strings, by adding
|
strings, by adding
|
||||||
@ -94,17 +111,17 @@ and POSIX wrappers are for the 8-bit library only, and that <b>pcregrep</b> is
|
|||||||
an 8-bit program. None of these are built if you select only the 16-bit or
|
an 8-bit program. None of these are built if you select only the 16-bit or
|
||||||
32-bit libraries.
|
32-bit libraries.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">BUILDING SHARED AND STATIC LIBRARIES</a><br>
|
<br><a name="SEC4" href="#TOC1">BUILDING SHARED AND STATIC LIBRARIES</a><br>
|
||||||
<P>
|
<P>
|
||||||
The PCRE building process uses <b>libtool</b> to build both shared and static
|
The Autotools PCRE building process uses <b>libtool</b> to build both shared and
|
||||||
Unix libraries by default. You can suppress one of these by adding one of
|
static libraries by default. You can suppress one of these by adding one of
|
||||||
<pre>
|
<pre>
|
||||||
--disable-shared
|
--disable-shared
|
||||||
--disable-static
|
--disable-static
|
||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command, as required.
|
to the <b>configure</b> command, as required.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">C++ SUPPORT</a><br>
|
<br><a name="SEC5" href="#TOC1">C++ SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
By default, if the 8-bit library is being built, the <b>configure</b> script
|
By default, if the 8-bit library is being built, the <b>configure</b> script
|
||||||
will search for a C++ compiler and C++ header files. If it finds them, it
|
will search for a C++ compiler and C++ header files. If it finds them, it
|
||||||
@ -115,7 +132,7 @@ strings). You can disable this by adding
|
|||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command.
|
to the <b>configure</b> command.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">UTF-8, UTF-16 AND UTF-32 SUPPORT</a><br>
|
<br><a name="SEC6" href="#TOC1">UTF-8, UTF-16 AND UTF-32 SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
To build PCRE with support for UTF Unicode character strings, add
|
To build PCRE with support for UTF Unicode character strings, add
|
||||||
<pre>
|
<pre>
|
||||||
@ -143,7 +160,7 @@ not possible to support both EBCDIC and UTF-8 codes in the same version of the
|
|||||||
library. Consequently, --enable-utf and --enable-ebcdic are mutually
|
library. Consequently, --enable-utf and --enable-ebcdic are mutually
|
||||||
exclusive.
|
exclusive.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">UNICODE CHARACTER PROPERTY SUPPORT</a><br>
|
<br><a name="SEC7" href="#TOC1">UNICODE CHARACTER PROPERTY SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
UTF support allows the libraries to process character codepoints up to 0x10ffff
|
UTF support allows the libraries to process character codepoints up to 0x10ffff
|
||||||
in the strings that they handle. On its own, however, it does not provide any
|
in the strings that they handle. On its own, however, it does not provide any
|
||||||
@ -163,7 +180,7 @@ supported. Details are given in the
|
|||||||
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC7" href="#TOC1">JUST-IN-TIME COMPILER SUPPORT</a><br>
|
<br><a name="SEC8" href="#TOC1">JUST-IN-TIME COMPILER SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
Just-in-time compiler support is included in the build by specifying
|
Just-in-time compiler support is included in the build by specifying
|
||||||
<pre>
|
<pre>
|
||||||
@ -180,7 +197,7 @@ pcregrep automatically makes use of it, unless you add
|
|||||||
</pre>
|
</pre>
|
||||||
to the "configure" command.
|
to the "configure" command.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC8" href="#TOC1">CODE VALUE OF NEWLINE</a><br>
|
<br><a name="SEC9" href="#TOC1">CODE VALUE OF NEWLINE</a><br>
|
||||||
<P>
|
<P>
|
||||||
By default, PCRE interprets the linefeed (LF) character as indicating the end
|
By default, PCRE interprets the linefeed (LF) character as indicating the end
|
||||||
of a line. This is the normal newline character on Unix-like systems. You can
|
of a line. This is the normal newline character on Unix-like systems. You can
|
||||||
@ -213,7 +230,7 @@ Whatever line ending convention is selected when PCRE is built can be
|
|||||||
overridden when the library functions are called. At build time it is
|
overridden when the library functions are called. At build time it is
|
||||||
conventional to use the standard for your operating system.
|
conventional to use the standard for your operating system.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC9" href="#TOC1">WHAT \R MATCHES</a><br>
|
<br><a name="SEC10" href="#TOC1">WHAT \R MATCHES</a><br>
|
||||||
<P>
|
<P>
|
||||||
By default, the sequence \R in a pattern matches any Unicode newline sequence,
|
By default, the sequence \R in a pattern matches any Unicode newline sequence,
|
||||||
whatever has been selected as the line ending sequence. If you specify
|
whatever has been selected as the line ending sequence. If you specify
|
||||||
@ -224,7 +241,7 @@ the default is changed so that \R matches only CR, LF, or CRLF. Whatever is
|
|||||||
selected when PCRE is built can be overridden when the library functions are
|
selected when PCRE is built can be overridden when the library functions are
|
||||||
called.
|
called.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC10" href="#TOC1">POSIX MALLOC USAGE</a><br>
|
<br><a name="SEC11" href="#TOC1">POSIX MALLOC USAGE</a><br>
|
||||||
<P>
|
<P>
|
||||||
When the 8-bit library is called through the POSIX interface (see the
|
When the 8-bit library is called through the POSIX interface (see the
|
||||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||||
@ -240,7 +257,7 @@ such as
|
|||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command.
|
to the <b>configure</b> command.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC11" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
|
<br><a name="SEC12" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
|
||||||
<P>
|
<P>
|
||||||
Within a compiled pattern, offset values are used to point from one part to
|
Within a compiled pattern, offset values are used to point from one part to
|
||||||
another (for example, from an opening parenthesis to an alternation
|
another (for example, from an opening parenthesis to an alternation
|
||||||
@ -259,7 +276,7 @@ longer offsets slows down the operation of PCRE because it has to load
|
|||||||
additional data when handling them. For the 32-bit library the value is always
|
additional data when handling them. For the 32-bit library the value is always
|
||||||
4 and cannot be overridden; the value of --with-link-size is ignored.
|
4 and cannot be overridden; the value of --with-link-size is ignored.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
|
<br><a name="SEC13" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
|
||||||
<P>
|
<P>
|
||||||
When matching with the <b>pcre_exec()</b> function, PCRE implements backtracking
|
When matching with the <b>pcre_exec()</b> function, PCRE implements backtracking
|
||||||
by making recursive calls to an internal function called <b>match()</b>. In
|
by making recursive calls to an internal function called <b>match()</b>. In
|
||||||
@ -290,7 +307,7 @@ perform better than <b>malloc()</b> and <b>free()</b>. PCRE runs noticeably more
|
|||||||
slowly when built in this way. This option affects only the <b>pcre_exec()</b>
|
slowly when built in this way. This option affects only the <b>pcre_exec()</b>
|
||||||
function; it is not relevant for <b>pcre_dfa_exec()</b>.
|
function; it is not relevant for <b>pcre_dfa_exec()</b>.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC13" href="#TOC1">LIMITING PCRE RESOURCE USAGE</a><br>
|
<br><a name="SEC14" href="#TOC1">LIMITING PCRE RESOURCE USAGE</a><br>
|
||||||
<P>
|
<P>
|
||||||
Internally, PCRE has a function called <b>match()</b>, which it calls repeatedly
|
Internally, PCRE has a function called <b>match()</b>, which it calls repeatedly
|
||||||
(sometimes recursively) when matching a pattern with the <b>pcre_exec()</b>
|
(sometimes recursively) when matching a pattern with the <b>pcre_exec()</b>
|
||||||
@ -319,7 +336,7 @@ constraints. However, you can set a lower limit by adding, for example,
|
|||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command. This value can also be overridden at run time.
|
to the <b>configure</b> command. This value can also be overridden at run time.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC14" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
<br><a name="SEC15" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
||||||
<P>
|
<P>
|
||||||
PCRE uses fixed tables for processing characters whose code values are less
|
PCRE uses fixed tables for processing characters whose code values are less
|
||||||
than 256. By default, PCRE is built with a set of tables that are distributed
|
than 256. By default, PCRE is built with a set of tables that are distributed
|
||||||
@ -336,7 +353,7 @@ compiling, because <b>dftables</b> is run on the local host. If you need to
|
|||||||
create alternative tables when cross compiling, you will have to do so "by
|
create alternative tables when cross compiling, you will have to do so "by
|
||||||
hand".)
|
hand".)
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC15" href="#TOC1">USING EBCDIC CODE</a><br>
|
<br><a name="SEC16" href="#TOC1">USING EBCDIC CODE</a><br>
|
||||||
<P>
|
<P>
|
||||||
PCRE assumes by default that it will run in an environment where the character
|
PCRE assumes by default that it will run in an environment where the character
|
||||||
code is ASCII (or Unicode, which is a superset of ASCII). This is the case for
|
code is ASCII (or Unicode, which is a superset of ASCII). This is the case for
|
||||||
@ -367,7 +384,7 @@ The options that select newline behaviour, such as --enable-newline-is-cr,
|
|||||||
and equivalent run-time options, refer to these character values in an EBCDIC
|
and equivalent run-time options, refer to these character values in an EBCDIC
|
||||||
environment.
|
environment.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC16" href="#TOC1">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
|
<br><a name="SEC17" href="#TOC1">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
By default, <b>pcregrep</b> reads all files as plain text. You can build it so
|
By default, <b>pcregrep</b> reads all files as plain text. You can build it so
|
||||||
that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads
|
that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads
|
||||||
@ -380,7 +397,7 @@ to the <b>configure</b> command. These options naturally require that the
|
|||||||
relevant libraries are installed on your system. Configuration will fail if
|
relevant libraries are installed on your system. Configuration will fail if
|
||||||
they are not.
|
they are not.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC17" href="#TOC1">PCREGREP BUFFER SIZE</a><br>
|
<br><a name="SEC18" href="#TOC1">PCREGREP BUFFER SIZE</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcregrep</b> uses an internal buffer to hold a "window" on the file it is
|
<b>pcregrep</b> uses an internal buffer to hold a "window" on the file it is
|
||||||
scanning, in order to be able to output "before" and "after" lines when it
|
scanning, in order to be able to output "before" and "after" lines when it
|
||||||
@ -395,7 +412,7 @@ parameter value by adding, for example,
|
|||||||
to the <b>configure</b> command. The caller of \fPpcregrep\fP can, however,
|
to the <b>configure</b> command. The caller of \fPpcregrep\fP can, however,
|
||||||
override this value by specifying a run-time option.
|
override this value by specifying a run-time option.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC18" href="#TOC1">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a><br>
|
<br><a name="SEC19" href="#TOC1">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
If you add
|
If you add
|
||||||
<pre>
|
<pre>
|
||||||
@ -426,7 +443,7 @@ automatically included, you may need to add something like
|
|||||||
</pre>
|
</pre>
|
||||||
immediately before the <b>configure</b> command.
|
immediately before the <b>configure</b> command.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC19" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
|
<br><a name="SEC20" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
By adding the
|
By adding the
|
||||||
<pre>
|
<pre>
|
||||||
@ -436,7 +453,7 @@ option to to the <b>configure</b> command, PCRE will use valgrind annotations
|
|||||||
to mark certain memory regions as unaddressable. This allows it to detect
|
to mark certain memory regions as unaddressable. This allows it to detect
|
||||||
invalid memory accesses, and is mostly useful for debugging PCRE itself.
|
invalid memory accesses, and is mostly useful for debugging PCRE itself.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC20" href="#TOC1">CODE COVERAGE REPORTING</a><br>
|
<br><a name="SEC21" href="#TOC1">CODE COVERAGE REPORTING</a><br>
|
||||||
<P>
|
<P>
|
||||||
If your C compiler is gcc, you can build a version of PCRE that can generate a
|
If your C compiler is gcc, you can build a version of PCRE that can generate a
|
||||||
code coverage report for its test suite. To enable this, you must install
|
code coverage report for its test suite. To enable this, you must install
|
||||||
@ -493,11 +510,11 @@ This cleans all coverage data including the generated coverage report. For more
|
|||||||
information about code coverage, see the <b>gcov</b> and <b>lcov</b>
|
information about code coverage, see the <b>gcov</b> and <b>lcov</b>
|
||||||
documentation.
|
documentation.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">SEE ALSO</a><br>
|
<br><a name="SEC22" href="#TOC1">SEE ALSO</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcreapi</b>(3), <b>pcre16</b>, <b>pcre32</b>, <b>pcre_config</b>(3).
|
<b>pcreapi</b>(3), <b>pcre16</b>, <b>pcre32</b>, <b>pcre_config</b>(3).
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC22" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC23" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
@ -506,11 +523,11 @@ University Computing Service
|
|||||||
Cambridge CB2 3QH, England.
|
Cambridge CB2 3QH, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC23" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC24" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 30 October 2012
|
Last updated: 12 May 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -64,23 +64,63 @@ it is processed as if it were
|
|||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
Notice that there is a callout before and after each parenthesis and
|
Notice that there is a callout before and after each parenthesis and
|
||||||
alternation bar. Automatic callouts can be used for tracking the progress of
|
alternation bar. If the pattern contains a conditional group whose condition is
|
||||||
pattern matching. The
|
an assertion, an automatic callout is inserted immediately before the
|
||||||
<a href="pcretest.html"><b>pcretest</b></a>
|
condition. Such a callout may also be inserted explicitly, for example:
|
||||||
command has an option that sets automatic callouts; when it is used, the output
|
<pre>
|
||||||
indicates how the pattern is matched. This is useful information when you are
|
(?(?C9)(?=a)ab|de)
|
||||||
trying to optimize the performance of a particular pattern.
|
</pre>
|
||||||
|
This applies only to assertion conditions (because they are themselves
|
||||||
|
independent groups).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The use of callouts in a pattern makes it ineligible for optimization by the
|
Automatic callouts can be used for tracking the progress of pattern matching.
|
||||||
just-in-time compiler. Studying such a pattern with the PCRE_STUDY_JIT_COMPILE
|
The
|
||||||
option always fails.
|
<a href="pcretest.html"><b>pcretest</b></a>
|
||||||
|
program has a pattern qualifier (/C) that sets automatic callouts; when it is
|
||||||
|
used, the output indicates how the pattern is being matched. This is useful
|
||||||
|
information when you are trying to optimize the performance of a particular
|
||||||
|
pattern.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
|
<br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
You should be aware that, because of optimizations in the way PCRE matches
|
You should be aware that, because of optimizations in the way PCRE compiles and
|
||||||
patterns by default, callouts sometimes do not happen. For example, if the
|
matches patterns, callouts sometimes do not happen exactly as you might expect.
|
||||||
pattern is
|
</P>
|
||||||
|
<P>
|
||||||
|
At compile time, PCRE "auto-possessifies" repeated items when it knows that
|
||||||
|
what follows cannot be part of the repeat. For example, a+[bc] is compiled as
|
||||||
|
if it were a++[bc]. The <b>pcretest</b> output when this pattern is anchored and
|
||||||
|
then applied with automatic callouts to the string "aaaa" is:
|
||||||
|
<pre>
|
||||||
|
--->aaaa
|
||||||
|
+0 ^ ^
|
||||||
|
+1 ^ a+
|
||||||
|
+3 ^ ^ [bc]
|
||||||
|
No match
|
||||||
|
</pre>
|
||||||
|
This indicates that when matching [bc] fails, there is no backtracking into a+
|
||||||
|
and therefore the callouts that would be taken for the backtracks do not occur.
|
||||||
|
You can disable the auto-possessify feature by passing PCRE_NO_AUTO_POSSESS
|
||||||
|
to <b>pcre_compile()</b>, or starting the pattern with (*NO_AUTO_POSSESS). If
|
||||||
|
this is done in <b>pcretest</b> (using the /O qualifier), the output changes to
|
||||||
|
this:
|
||||||
|
<pre>
|
||||||
|
--->aaaa
|
||||||
|
+0 ^ ^
|
||||||
|
+1 ^ a+
|
||||||
|
+3 ^ ^ [bc]
|
||||||
|
+3 ^ ^ [bc]
|
||||||
|
+3 ^ ^ [bc]
|
||||||
|
+3 ^^ [bc]
|
||||||
|
No match
|
||||||
|
</pre>
|
||||||
|
This time, when matching [bc] fails, the matcher backtracks into a+ and tries
|
||||||
|
again, repeatedly, until a+ itself fails.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Other optimizations that provide fast "no match" results also affect callouts.
|
||||||
|
For example, if the pattern is
|
||||||
<pre>
|
<pre>
|
||||||
ab(?C4)cd
|
ab(?C4)cd
|
||||||
</pre>
|
</pre>
|
||||||
@ -104,11 +144,11 @@ callouts such as the example above are obeyed.
|
|||||||
<br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
|
<br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
|
||||||
<P>
|
<P>
|
||||||
During matching, when PCRE reaches a callout point, the external function
|
During matching, when PCRE reaches a callout point, the external function
|
||||||
defined by <i>pcre_callout</i> or <i>pcre[16|32]_callout</i> is called
|
defined by <i>pcre_callout</i> or <i>pcre[16|32]_callout</i> is called (if it is
|
||||||
(if it is set). This applies to both normal and DFA matching. The only
|
set). This applies to both normal and DFA matching. The only argument to the
|
||||||
argument to the callout function is a pointer to a <b>pcre_callout</b>
|
callout function is a pointer to a <b>pcre_callout</b> or
|
||||||
or <b>pcre[16|32]_callout</b> block.
|
<b>pcre[16|32]_callout</b> block. These structures contains the following
|
||||||
These structures contains the following fields:
|
fields:
|
||||||
<pre>
|
<pre>
|
||||||
int <i>version</i>;
|
int <i>version</i>;
|
||||||
int <i>callout_number</i>;
|
int <i>callout_number</i>;
|
||||||
@ -141,10 +181,10 @@ automatically generated callouts).
|
|||||||
<P>
|
<P>
|
||||||
The <i>offset_vector</i> field is a pointer to the vector of offsets that was
|
The <i>offset_vector</i> field is a pointer to the vector of offsets that was
|
||||||
passed by the caller to the matching function. When <b>pcre_exec()</b> or
|
passed by the caller to the matching function. When <b>pcre_exec()</b> or
|
||||||
<b>pcre[16|32]_exec()</b> is used, the contents can be inspected, in order to extract
|
<b>pcre[16|32]_exec()</b> is used, the contents can be inspected, in order to
|
||||||
substrings that have been matched so far, in the same way as for extracting
|
extract substrings that have been matched so far, in the same way as for
|
||||||
substrings after a match has completed. For the DFA matching functions, this
|
extracting substrings after a match has completed. For the DFA matching
|
||||||
field is not useful.
|
functions, this field is not useful.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
|
The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
|
||||||
@ -171,8 +211,10 @@ functions are used, because they do not support captured substrings.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>capture_last</i> field contains the number of the most recently captured
|
The <i>capture_last</i> field contains the number of the most recently captured
|
||||||
substring. If no substrings have been captured, its value is -1. This is always
|
substring. However, when a recursion exits, the value reverts to what it was
|
||||||
the case for the DFA matching functions.
|
outside the recursion, as do the values of all captured substrings. If no
|
||||||
|
substrings have been captured, the value of <i>capture_last</i> is -1. This is
|
||||||
|
always the case for the DFA matching functions.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>callout_data</i> field contains a value that is passed to a matching
|
The <i>callout_data</i> field contains a value that is passed to a matching
|
||||||
@ -203,11 +245,12 @@ same callout number. However, they are set for all callouts.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>mark</i> field is present from version 2 of the callout structure. In
|
The <i>mark</i> field is present from version 2 of the callout structure. In
|
||||||
callouts from <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b> it contains a pointer to
|
callouts from <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b> it contains a
|
||||||
the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
|
pointer to the zero-terminated name of the most recently passed (*MARK),
|
||||||
(*THEN) item in the match, or NULL if no such items have been passed. Instances
|
(*PRUNE), or (*THEN) item in the match, or NULL if no such items have been
|
||||||
of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
|
passed. Instances of (*PRUNE) or (*THEN) without a name do not obliterate a
|
||||||
callouts from the DFA matching functions this field always contains NULL.
|
previous (*MARK). In callouts from the DFA matching functions this field always
|
||||||
|
contains NULL.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">RETURN VALUES</a><br>
|
<br><a name="SEC5" href="#TOC1">RETURN VALUES</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -234,9 +277,9 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 24 June 2012
|
Last updated: 12 November 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -36,10 +36,8 @@ these do not seem to have any use.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
3. Capturing subpatterns that occur inside negative lookahead assertions are
|
3. Capturing subpatterns that occur inside negative lookahead assertions are
|
||||||
counted, but their entries in the offsets vector are never set. Perl sets its
|
counted, but their entries in the offsets vector are never set. Perl sometimes
|
||||||
numerical variables from any such patterns that are matched before the
|
(but not always) sets its numerical variables from inside negative assertions.
|
||||||
assertion fails to match something (thereby succeeding), but only if the
|
|
||||||
negative lookahead assertion contains just one branch.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
4. Though binary zero characters are supported in the subject string, they are
|
4. Though binary zero characters are supported in the subject string, they are
|
||||||
@ -102,24 +100,32 @@ in the
|
|||||||
page.
|
page.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
10. If any of the backtracking control verbs are used in an assertion or in a
|
10. If any of the backtracking control verbs are used in a subpattern that is
|
||||||
subpattern that is called as a subroutine (whether or not recursively), their
|
called as a subroutine (whether or not recursively), their effect is confined
|
||||||
effect is confined to that subpattern; it does not extend to the surrounding
|
to that subpattern; it does not extend to the surrounding pattern. This is not
|
||||||
pattern. This is not always the case in Perl. In particular, if (*THEN) is
|
always the case in Perl. In particular, if (*THEN) is present in a group that
|
||||||
present in a group that is called as a subroutine, its action is limited to
|
is called as a subroutine, its action is limited to that group, even if the
|
||||||
that group, even if the group does not contain any | characters. There is one
|
group does not contain any | characters. Note that such subpatterns are
|
||||||
exception to this: the name from a *(MARK), (*PRUNE), or (*THEN) that is
|
processed as anchored at the point where they are tested.
|
||||||
encountered in a successful positive assertion <i>is</i> passed back when a
|
|
||||||
match succeeds (compare capturing parentheses in assertions). Note that such
|
|
||||||
subpatterns are processed as anchored at the point where they are tested.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
11. There are some differences that are concerned with the settings of captured
|
11. If a pattern contains more than one backtracking control verb, the first
|
||||||
|
one that is backtracked onto acts. For example, in the pattern
|
||||||
|
A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
|
||||||
|
triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
|
||||||
|
same as PCRE, but there are examples where it differs.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
12. Most backtracking verbs in assertions have their normal actions. They are
|
||||||
|
not confined to the assertion.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
13. There are some differences that are concerned with the settings of captured
|
||||||
strings when part of a pattern is repeated. For example, matching "aba" against
|
strings when part of a pattern is repeated. For example, matching "aba" against
|
||||||
the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE it is set to "b".
|
the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE it is set to "b".
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
12. PCRE's handling of duplicate subpattern numbers and duplicate subpattern
|
14. PCRE's handling of duplicate subpattern numbers and duplicate subpattern
|
||||||
names is not as general as Perl's. This is a consequence of the fact the PCRE
|
names is not as general as Perl's. This is a consequence of the fact the PCRE
|
||||||
works internally just with numbers, using an external table to translate
|
works internally just with numbers, using an external table to translate
|
||||||
between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b)B),
|
between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b)B),
|
||||||
@ -130,13 +136,26 @@ names map to capturing subpattern number 1. To avoid this confusing situation,
|
|||||||
an error is given at compile time.
|
an error is given at compile time.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
13. Perl recognizes comments in some places that PCRE does not, for example,
|
15. Perl recognizes comments in some places that PCRE does not, for example,
|
||||||
between the ( and ? at the start of a subpattern. If the /x modifier is set,
|
between the ( and ? at the start of a subpattern. If the /x modifier is set,
|
||||||
Perl allows white space between ( and ? but PCRE never does, even if the
|
Perl allows white space between ( and ? (though current Perls warn that this is
|
||||||
PCRE_EXTENDED option is set.
|
deprecated) but PCRE never does, even if the PCRE_EXTENDED option is set.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
14. PCRE provides some extensions to the Perl regular expression facilities.
|
16. Perl, when in warning mode, gives warnings for character classes such as
|
||||||
|
[A-\d] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE has no
|
||||||
|
warning features, so it gives an error in these cases because they are almost
|
||||||
|
certainly user mistakes.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
17. In PCRE, the upper/lower case character properties Lu and Ll are not
|
||||||
|
affected when case-independent matching is specified. For example, \p{Lu}
|
||||||
|
always matches an upper case letter. I think Perl has changed in this respect;
|
||||||
|
in the release at the time of writing (5.16), \p{Lu} and \p{Ll} match all
|
||||||
|
letters, regardless of case, when case independence is specified.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
18. PCRE provides some extensions to the Perl regular expression facilities.
|
||||||
Perl 5.10 includes new features that are not in earlier versions of Perl, some
|
Perl 5.10 includes new features that are not in earlier versions of Perl, some
|
||||||
of which (such as named parentheses) have been in PCRE for some time. This list
|
of which (such as named parentheses) have been in PCRE for some time. This list
|
||||||
is with respect to Perl 5.10:
|
is with respect to Perl 5.10:
|
||||||
@ -207,9 +226,9 @@ Cambridge CB2 3QH, England.
|
|||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 25 August 2012
|
Last updated: 10 November 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -37,8 +37,10 @@ man page, in case the conversion went wrong.
|
|||||||
<b>pcregrep</b> searches files for character patterns, in the same way as other
|
<b>pcregrep</b> searches files for character patterns, in the same way as other
|
||||||
grep commands do, but it uses the PCRE regular expression library to support
|
grep commands do, but it uses the PCRE regular expression library to support
|
||||||
patterns that are compatible with the regular expressions of Perl 5. See
|
patterns that are compatible with the regular expressions of Perl 5. See
|
||||||
|
<a href="pcresyntax.html"><b>pcresyntax</b>(3)</a>
|
||||||
|
for a quick-reference summary of pattern syntax, or
|
||||||
<a href="pcrepattern.html"><b>pcrepattern</b>(3)</a>
|
<a href="pcrepattern.html"><b>pcrepattern</b>(3)</a>
|
||||||
for a full description of syntax and semantics of the regular expressions
|
for a full description of the syntax and semantics of the regular expressions
|
||||||
that PCRE supports.
|
that PCRE supports.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
@ -748,9 +750,9 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 13 September 2012
|
Last updated: 03 April 2014
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2014 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -172,15 +172,9 @@ PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, PCRE_PARTIAL_HARD, and
|
|||||||
PCRE_PARTIAL_SOFT.
|
PCRE_PARTIAL_SOFT.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The unsupported pattern items are:
|
The only unsupported pattern items are \C (match a single data unit) when
|
||||||
<pre>
|
running in a UTF mode, and a callout immediately before an assertion condition
|
||||||
\C match a single byte; not supported in UTF-8 mode
|
in a conditional group.
|
||||||
(?Cn) callouts
|
|
||||||
(*PRUNE) )
|
|
||||||
(*SKIP) ) backtracking control verbs
|
|
||||||
(*THEN) )
|
|
||||||
</pre>
|
|
||||||
Support for some of these may be added in future.
|
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">RETURN VALUES FROM JIT EXECUTION</a><br>
|
<br><a name="SEC6" href="#TOC1">RETURN VALUES FROM JIT EXECUTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -449,9 +443,9 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 31 October 2012
|
Last updated: 17 March 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -21,9 +21,10 @@ practice be relevant.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The maximum length of a compiled pattern is approximately 64K data units (bytes
|
The maximum length of a compiled pattern is approximately 64K data units (bytes
|
||||||
for the 8-bit library, 32-bit units for the 32-bit library, and 32-bit units for
|
for the 8-bit library, 16-bit units for the 16-bit library, and 32-bit units for
|
||||||
the 32-bit library) if PCRE is compiled with the default internal linkage size
|
the 32-bit library) if PCRE is compiled with the default internal linkage size,
|
||||||
of 2 bytes. If you want to process regular expressions that are truly enormous,
|
which is 2 bytes for the 8-bit and 16-bit libraries, and 4 bytes for the 32-bit
|
||||||
|
library. If you want to process regular expressions that are truly enormous,
|
||||||
you can compile PCRE with an internal linkage size of 3 or 4 (when building the
|
you can compile PCRE with an internal linkage size of 3 or 4 (when building the
|
||||||
16-bit or 32-bit library, 3 is rounded up to 4). See the <b>README</b> file in
|
16-bit or 32-bit library, 3 is rounded up to 4). See the <b>README</b> file in
|
||||||
the source distribution and the
|
the source distribution and the
|
||||||
@ -36,7 +37,10 @@ All values in repeating quantifiers must be less than 65536.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is no limit to the number of parenthesized subpatterns, but there can be
|
There is no limit to the number of parenthesized subpatterns, but there can be
|
||||||
no more than 65535 capturing subpatterns.
|
no more than 65535 capturing subpatterns. There is, however, a limit to the
|
||||||
|
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
|
||||||
|
order to limit the amount of system stack used at compile time. The limit can
|
||||||
|
be specified when PCRE is built; the default is 250.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a limit to the number of forward references to subsequent subpatterns
|
There is a limit to the number of forward references to subsequent subpatterns
|
||||||
@ -50,7 +54,7 @@ maximum number of named subpatterns is 10000.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
||||||
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit library.
|
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The maximum length of a subject string is the largest positive number that an
|
The maximum length of a subject string is the largest positive number that an
|
||||||
@ -77,9 +81,9 @@ Cambridge CB2 3QH, England.
|
|||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 04 May 2012
|
Last updated: 05 November 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -126,6 +126,15 @@ character of the subject. The algorithm does not automatically move on to find
|
|||||||
matches that start at later positions.
|
matches that start at later positions.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
PCRE's "auto-possessification" optimization usually applies to character
|
||||||
|
repeats at the end of a pattern (as well as internally). For example, the
|
||||||
|
pattern "a\d+" is compiled as if it were "a\d++" because there is no point
|
||||||
|
even considering the possibility of backtracking into the repeated digits. For
|
||||||
|
DFA matching, this means that only one possible match is found. If you really
|
||||||
|
do want multiple matches in such cases, either use an ungreedy repeat
|
||||||
|
("a\d+?") or set the PCRE_NO_AUTO_POSSESS option when compiling.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
There are a number of features of PCRE regular expressions that are not
|
There are a number of features of PCRE regular expressions that are not
|
||||||
supported by the alternative matching algorithm. They are as follows:
|
supported by the alternative matching algorithm. They are as follows:
|
||||||
</P>
|
</P>
|
||||||
@ -224,7 +233,7 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 08 January 2012
|
Last updated: 12 November 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
@ -81,33 +81,36 @@ strings. This optimization is also disabled for partial matching.
|
|||||||
<br><a name="SEC2" href="#TOC1">PARTIAL MATCHING USING pcre_exec() OR pcre[16|32]_exec()</a><br>
|
<br><a name="SEC2" href="#TOC1">PARTIAL MATCHING USING pcre_exec() OR pcre[16|32]_exec()</a><br>
|
||||||
<P>
|
<P>
|
||||||
A partial match occurs during a call to <b>pcre_exec()</b> or
|
A partial match occurs during a call to <b>pcre_exec()</b> or
|
||||||
<b>pcre[16|32]_exec()</b> when the end of the subject string is reached successfully,
|
<b>pcre[16|32]_exec()</b> when the end of the subject string is reached
|
||||||
but matching cannot continue because more characters are needed. However, at
|
successfully, but matching cannot continue because more characters are needed.
|
||||||
least one character in the subject must have been inspected. This character
|
However, at least one character in the subject must have been inspected. This
|
||||||
need not form part of the final matched string; lookbehind assertions and the
|
character need not form part of the final matched string; lookbehind assertions
|
||||||
\K escape sequence provide ways of inspecting characters before the start of a
|
and the \K escape sequence provide ways of inspecting characters before the
|
||||||
matched substring. The requirement for inspecting at least one character exists
|
start of a matched substring. The requirement for inspecting at least one
|
||||||
because an empty string can always be matched; without such a restriction there
|
character exists because an empty string can always be matched; without such a
|
||||||
would always be a partial match of an empty string at the end of the subject.
|
restriction there would always be a partial match of an empty string at the end
|
||||||
|
of the subject.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If there are at least two slots in the offsets vector when a partial match is
|
If there are at least two slots in the offsets vector when a partial match is
|
||||||
returned, the first slot is set to the offset of the earliest character that
|
returned, the first slot is set to the offset of the earliest character that
|
||||||
was inspected. For convenience, the second offset points to the end of the
|
was inspected. For convenience, the second offset points to the end of the
|
||||||
subject so that a substring can easily be identified.
|
subject so that a substring can easily be identified. If there are at least
|
||||||
|
three slots in the offsets vector, the third slot is set to the offset of the
|
||||||
|
character where matching started.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
For the majority of patterns, the first offset identifies the start of the
|
For the majority of patterns, the contents of the first and third slots will be
|
||||||
partially matched string. However, for patterns that contain lookbehind
|
the same. However, for patterns that contain lookbehind assertions, or begin
|
||||||
assertions, or \K, or begin with \b or \B, earlier characters have been
|
with \b or \B, characters before the one where matching started may have been
|
||||||
inspected while carrying out the match. For example:
|
inspected while carrying out the match. For example, consider this pattern:
|
||||||
<pre>
|
<pre>
|
||||||
/(?<=abc)123/
|
/(?<=abc)123/
|
||||||
</pre>
|
</pre>
|
||||||
This pattern matches "123", but only if it is preceded by "abc". If the subject
|
This pattern matches "123", but only if it is preceded by "abc". If the subject
|
||||||
string is "xyzabc12", the offsets after a partial match are for the substring
|
string is "xyzabc12", the first two offsets after a partial match are for the
|
||||||
"abc12", because all these characters are needed if another match is tried
|
substring "abc12", because all these characters were inspected. However, the
|
||||||
with extra characters added to the subject.
|
third offset is set to 6, because that is the offset where matching began.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
What happens when a partial match is identified depends on which of the two
|
What happens when a partial match is identified depends on which of the two
|
||||||
@ -303,6 +306,16 @@ not retain the previously partially-matched string. It is up to the calling
|
|||||||
program to do that if it needs to.
|
program to do that if it needs to.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
That means that, for an unanchored pattern, if a continued match fails, it is
|
||||||
|
not possible to try again at a new starting point. All this facility is capable
|
||||||
|
of doing is continuing with the previous match attempt. In the previous
|
||||||
|
example, if the second set of data is "ug23" the result is no match, even
|
||||||
|
though there would be a match for "aug23" if the entire string were given at
|
||||||
|
once. Depending on the application, this may or may not be what you want.
|
||||||
|
The only way to allow for starting again at the next character is to retain the
|
||||||
|
matched part of the subject and try a new complete match.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
You can set the PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with
|
You can set the PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with
|
||||||
PCRE_DFA_RESTART to continue partial matching over multiple segments. This
|
PCRE_DFA_RESTART to continue partial matching over multiple segments. This
|
||||||
facility can be used to pass very long subject strings to the DFA matching
|
facility can be used to pass very long subject strings to the DFA matching
|
||||||
@ -334,10 +347,9 @@ processing time is needed.
|
|||||||
<P>
|
<P>
|
||||||
<b>Note:</b> If the pattern contains lookbehind assertions, or \K, or starts
|
<b>Note:</b> If the pattern contains lookbehind assertions, or \K, or starts
|
||||||
with \b or \B, the string that is returned for a partial match includes
|
with \b or \B, the string that is returned for a partial match includes
|
||||||
characters that precede the partially matched string itself, because these must
|
characters that precede the start of what would be returned for a complete
|
||||||
be retained when adding on more characters for a subsequent matching attempt.
|
match, because it contains all the characters that were inspected during the
|
||||||
However, in some cases you may need to retain even earlier characters, as
|
partial match.
|
||||||
discussed in the next section.
|
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC9" href="#TOC1">ISSUES WITH MULTI-SEGMENT MATCHING</a><br>
|
<br><a name="SEC9" href="#TOC1">ISSUES WITH MULTI-SEGMENT MATCHING</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -356,12 +368,35 @@ includes the effect of PCRE_NOTEOL.
|
|||||||
offsets that are returned for a partial match. However a lookbehind assertion
|
offsets that are returned for a partial match. However a lookbehind assertion
|
||||||
later in the pattern could require even earlier characters to be inspected. You
|
later in the pattern could require even earlier characters to be inspected. You
|
||||||
can handle this case by using the PCRE_INFO_MAXLOOKBEHIND option of the
|
can handle this case by using the PCRE_INFO_MAXLOOKBEHIND option of the
|
||||||
<b>pcre_fullinfo()</b> or <b>pcre[16|32]_fullinfo()</b> functions to obtain the length
|
<b>pcre_fullinfo()</b> or <b>pcre[16|32]_fullinfo()</b> functions to obtain the
|
||||||
of the largest lookbehind in the pattern. This length is given in characters,
|
length of the longest lookbehind in the pattern. This length is given in
|
||||||
not bytes. If you always retain at least that many characters before the
|
characters, not bytes. If you always retain at least that many characters
|
||||||
partially matched string, all should be well. (Of course, near the start of the
|
before the partially matched string, all should be well. (Of course, near the
|
||||||
subject, fewer characters may be present; in that case all characters should be
|
start of the subject, fewer characters may be present; in that case all
|
||||||
retained.)
|
characters should be retained.)
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
From release 8.33, there is a more accurate way of deciding which characters to
|
||||||
|
retain. Instead of subtracting the length of the longest lookbehind from the
|
||||||
|
earliest inspected character (<i>offsets[0]</i>), the match start position
|
||||||
|
(<i>offsets[2]</i>) should be used, and the next match attempt started at the
|
||||||
|
<i>offsets[2]</i> character by setting the <i>startoffset</i> argument of
|
||||||
|
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
For example, if the pattern "(?<=123)abc" is partially
|
||||||
|
matched against the string "xx123a", the three offset values returned are 2, 6,
|
||||||
|
and 5. This indicates that the matching process that gave a partial match
|
||||||
|
started at offset 5, but the characters "123a" were all inspected. The maximum
|
||||||
|
lookbehind for that pattern is 3, so taking that away from 5 shows that we need
|
||||||
|
only keep "123a", and the next match attempt can be started at offset 3 (that
|
||||||
|
is, at "a") when further characters have been added. When the match start is
|
||||||
|
not the earliest inspected character, <b>pcretest</b> shows it explicitly:
|
||||||
|
<pre>
|
||||||
|
re> "(?<=123)abc"
|
||||||
|
data> xx123a\P\P
|
||||||
|
Partial match at offset 5: 123a
|
||||||
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
3. Because a partial match must always contain at least one character, what
|
3. Because a partial match must always contain at least one character, what
|
||||||
@ -465,9 +500,9 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC11" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC11" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 24 June 2012
|
Last updated: 02 July 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -13,7 +13,7 @@ from the original man page. If there is any nonsense in it, please consult the
|
|||||||
man page, in case the conversion went wrong.
|
man page, in case the conversion went wrong.
|
||||||
<br>
|
<br>
|
||||||
<ul>
|
<ul>
|
||||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS OF POSIX API</a>
|
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
|
||||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
||||||
<li><a name="TOC3" href="#SEC3">COMPILING A PATTERN</a>
|
<li><a name="TOC3" href="#SEC3">COMPILING A PATTERN</a>
|
||||||
<li><a name="TOC4" href="#SEC4">MATCHING NEWLINE CHARACTERS</a>
|
<li><a name="TOC4" href="#SEC4">MATCHING NEWLINE CHARACTERS</a>
|
||||||
@ -23,23 +23,21 @@ man page, in case the conversion went wrong.
|
|||||||
<li><a name="TOC8" href="#SEC8">AUTHOR</a>
|
<li><a name="TOC8" href="#SEC8">AUTHOR</a>
|
||||||
<li><a name="TOC9" href="#SEC9">REVISION</a>
|
<li><a name="TOC9" href="#SEC9">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS OF POSIX API</a><br>
|
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>#include <pcreposix.h></b>
|
<b>#include <pcreposix.h></b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int regcomp(regex_t *<i>preg</i>, const char *<i>pattern</i>,</b>
|
<b>int regcomp(regex_t *<i>preg</i>, const char *<i>pattern</i>,</b>
|
||||||
<b> int <i>cflags</i>);</b>
|
<b> int <i>cflags</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int regexec(regex_t *<i>preg</i>, const char *<i>string</i>,</b>
|
<b>int regexec(regex_t *<i>preg</i>, const char *<i>string</i>,</b>
|
||||||
<b> size_t <i>nmatch</i>, regmatch_t <i>pmatch</i>[], int <i>eflags</i>);</b>
|
<b> size_t <i>nmatch</i>, regmatch_t <i>pmatch</i>[], int <i>eflags</i>);</b>
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<b> size_t regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b>
|
<b> size_t regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b>
|
||||||
<b> char *<i>errbuf</i>, size_t <i>errbuf_size</i>);</b>
|
<b> char *<i>errbuf</i>, size_t <i>errbuf_size</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void regfree(regex_t *<i>preg</i>);</b>
|
<b>void regfree(regex_t *<i>preg</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||||
|
@ -102,8 +102,8 @@ study data.
|
|||||||
<br><a name="SEC3" href="#TOC1">RE-USING A PRECOMPILED PATTERN</a><br>
|
<br><a name="SEC3" href="#TOC1">RE-USING A PRECOMPILED PATTERN</a><br>
|
||||||
<P>
|
<P>
|
||||||
Re-using a precompiled pattern is straightforward. Having reloaded it into main
|
Re-using a precompiled pattern is straightforward. Having reloaded it into main
|
||||||
memory, called <b>pcre[16|32]_pattern_to_host_byte_order()</b> if necessary,
|
memory, called <b>pcre[16|32]_pattern_to_host_byte_order()</b> if necessary, you
|
||||||
you pass its pointer to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> in
|
pass its pointer to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> in
|
||||||
the usual way.
|
the usual way.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
@ -119,6 +119,11 @@ in the
|
|||||||
documentation.
|
documentation.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>Warning:</b> The tables that <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b> use
|
||||||
|
must be the same as those that were used when the pattern was compiled. If this
|
||||||
|
is not the case, the behaviour is undefined.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
If you did not provide custom character tables when the pattern was compiled,
|
If you did not provide custom character tables when the pattern was compiled,
|
||||||
the pointer in the compiled pattern is NULL, which causes the matching
|
the pointer in the compiled pattern is NULL, which causes the matching
|
||||||
functions to use PCRE's internal tables. Thus, you do not need to take any
|
functions to use PCRE's internal tables. Thus, you do not need to take any
|
||||||
@ -126,9 +131,9 @@ special action at run time in this case.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If you saved study data with the compiled pattern, you need to create your own
|
If you saved study data with the compiled pattern, you need to create your own
|
||||||
<b>pcre[16|32]_extra</b> data block and set the <i>study_data</i> field to point to the
|
<b>pcre[16|32]_extra</b> data block and set the <i>study_data</i> field to point
|
||||||
reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in the
|
to the reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in
|
||||||
<i>flags</i> field to indicate that study data is present. Then pass the
|
the <i>flags</i> field to indicate that study data is present. Then pass the
|
||||||
<b>pcre[16|32]_extra</b> block to the matching function in the usual way. If the
|
<b>pcre[16|32]_extra</b> block to the matching function in the usual way. If the
|
||||||
pattern was studied for just-in-time optimization, that data cannot be saved,
|
pattern was studied for just-in-time optimization, that data cannot be saved,
|
||||||
and so is lost by a save/restore cycle.
|
and so is lost by a save/restore cycle.
|
||||||
@ -149,9 +154,9 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 24 June 2012
|
Last updated: 12 November 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -29,13 +29,13 @@ man page, in case the conversion went wrong.
|
|||||||
<li><a name="TOC14" href="#SEC14">ATOMIC GROUPS</a>
|
<li><a name="TOC14" href="#SEC14">ATOMIC GROUPS</a>
|
||||||
<li><a name="TOC15" href="#SEC15">COMMENT</a>
|
<li><a name="TOC15" href="#SEC15">COMMENT</a>
|
||||||
<li><a name="TOC16" href="#SEC16">OPTION SETTING</a>
|
<li><a name="TOC16" href="#SEC16">OPTION SETTING</a>
|
||||||
<li><a name="TOC17" href="#SEC17">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
|
<li><a name="TOC17" href="#SEC17">NEWLINE CONVENTION</a>
|
||||||
<li><a name="TOC18" href="#SEC18">BACKREFERENCES</a>
|
<li><a name="TOC18" href="#SEC18">WHAT \R MATCHES</a>
|
||||||
<li><a name="TOC19" href="#SEC19">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
|
<li><a name="TOC19" href="#SEC19">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
|
||||||
<li><a name="TOC20" href="#SEC20">CONDITIONAL PATTERNS</a>
|
<li><a name="TOC20" href="#SEC20">BACKREFERENCES</a>
|
||||||
<li><a name="TOC21" href="#SEC21">BACKTRACKING CONTROL</a>
|
<li><a name="TOC21" href="#SEC21">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
|
||||||
<li><a name="TOC22" href="#SEC22">NEWLINE CONVENTIONS</a>
|
<li><a name="TOC22" href="#SEC22">CONDITIONAL PATTERNS</a>
|
||||||
<li><a name="TOC23" href="#SEC23">WHAT \R MATCHES</a>
|
<li><a name="TOC23" href="#SEC23">BACKTRACKING CONTROL</a>
|
||||||
<li><a name="TOC24" href="#SEC24">CALLOUTS</a>
|
<li><a name="TOC24" href="#SEC24">CALLOUTS</a>
|
||||||
<li><a name="TOC25" href="#SEC25">SEE ALSO</a>
|
<li><a name="TOC25" href="#SEC25">SEE ALSO</a>
|
||||||
<li><a name="TOC26" href="#SEC26">AUTHOR</a>
|
<li><a name="TOC26" href="#SEC26">AUTHOR</a>
|
||||||
@ -65,10 +65,14 @@ documentation. This document contains a quick-reference summary of the syntax.
|
|||||||
\n newline (hex 0A)
|
\n newline (hex 0A)
|
||||||
\r carriage return (hex 0D)
|
\r carriage return (hex 0D)
|
||||||
\t tab (hex 09)
|
\t tab (hex 09)
|
||||||
|
\0dd character with octal code 0dd
|
||||||
\ddd character with octal code ddd, or backreference
|
\ddd character with octal code ddd, or backreference
|
||||||
|
\o{ddd..} character with octal code ddd..
|
||||||
\xhh character with hex code hh
|
\xhh character with hex code hh
|
||||||
\x{hhh..} character with hex code hhh..
|
\x{hhh..} character with hex code hhh..
|
||||||
</PRE>
|
</pre>
|
||||||
|
Note that \0dd is always an octal code, and that \8 and \9 are the literal
|
||||||
|
characters "8" and "9".
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">CHARACTER TYPES</a><br>
|
<br><a name="SEC4" href="#TOC1">CHARACTER TYPES</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -92,9 +96,11 @@ documentation. This document contains a quick-reference summary of the syntax.
|
|||||||
\W a "non-word" character
|
\W a "non-word" character
|
||||||
\X a Unicode extended grapheme cluster
|
\X a Unicode extended grapheme cluster
|
||||||
</pre>
|
</pre>
|
||||||
In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
|
By default, \d, \s, and \w match only ASCII characters, even in UTF-8 mode
|
||||||
characters, even in a UTF mode. However, this can be changed by setting the
|
or in the 16- bit and 32-bit libraries. However, if locale-specific matching is
|
||||||
PCRE_UCP option.
|
happening, \s and \w may also match characters with code points in the range
|
||||||
|
128-255. If the PCRE_UCP option is set, the behaviour of these escape sequences
|
||||||
|
is changed to use Unicode properties and they match many more characters.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
|
<br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -150,9 +156,13 @@ PCRE_UCP option.
|
|||||||
<pre>
|
<pre>
|
||||||
Xan Alphanumeric: union of properties L and N
|
Xan Alphanumeric: union of properties L and N
|
||||||
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
||||||
Xsp Perl space: property Z or tab, NL, FF, CR
|
Xsp Perl space: property Z or tab, NL, VT, FF, CR
|
||||||
|
Xuc Univerally-named character: one that can be
|
||||||
|
represented by a Universal Character Name
|
||||||
Xwd Perl word: property Xan or underscore
|
Xwd Perl word: property Xan or underscore
|
||||||
</PRE>
|
</pre>
|
||||||
|
Perl and POSIX space are now the same. Perl added VT to its space character set
|
||||||
|
at release 5.18 and PCRE changed at release 8.34.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
|
<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -329,7 +339,8 @@ but some of them use Unicode properties if PCRE_UCP is set. You can use
|
|||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
\K reset start of match
|
\K reset start of match
|
||||||
</PRE>
|
</pre>
|
||||||
|
\K is honoured in positive assertions, but ignored in negative ones.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">ALTERNATION</a><br>
|
<br><a name="SEC12" href="#TOC1">ALTERNATION</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -372,18 +383,45 @@ but some of them use Unicode properties if PCRE_UCP is set. You can use
|
|||||||
(?x) extended (ignore white space)
|
(?x) extended (ignore white space)
|
||||||
(?-...) unset option(s)
|
(?-...) unset option(s)
|
||||||
</pre>
|
</pre>
|
||||||
The following are recognized only at the start of a pattern or after one of the
|
The following are recognized only at the very start of a pattern or after one
|
||||||
newline-setting options with similar syntax:
|
of the newline or \R options with similar syntax. More than one of them may
|
||||||
|
appear.
|
||||||
<pre>
|
<pre>
|
||||||
|
(*LIMIT_MATCH=d) set the match limit to d (decimal number)
|
||||||
|
(*LIMIT_RECURSION=d) set the recursion limit to d (decimal number)
|
||||||
|
(*NO_AUTO_POSSESS) no auto-possessification (PCRE_NO_AUTO_POSSESS)
|
||||||
(*NO_START_OPT) no start-match optimization (PCRE_NO_START_OPTIMIZE)
|
(*NO_START_OPT) no start-match optimization (PCRE_NO_START_OPTIMIZE)
|
||||||
(*UTF8) set UTF-8 mode: 8-bit library (PCRE_UTF8)
|
(*UTF8) set UTF-8 mode: 8-bit library (PCRE_UTF8)
|
||||||
(*UTF16) set UTF-16 mode: 16-bit library (PCRE_UTF16)
|
(*UTF16) set UTF-16 mode: 16-bit library (PCRE_UTF16)
|
||||||
(*UTF32) set UTF-32 mode: 32-bit library (PCRE_UTF32)
|
(*UTF32) set UTF-32 mode: 32-bit library (PCRE_UTF32)
|
||||||
(*UTF) set appropriate UTF mode for the library in use
|
(*UTF) set appropriate UTF mode for the library in use
|
||||||
(*UCP) set PCRE_UCP (use Unicode properties for \d etc)
|
(*UCP) set PCRE_UCP (use Unicode properties for \d etc)
|
||||||
|
</pre>
|
||||||
|
Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of the
|
||||||
|
limits set by the caller of pcre_exec(), not increase them.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC17" href="#TOC1">NEWLINE CONVENTION</a><br>
|
||||||
|
<P>
|
||||||
|
These are recognized only at the very start of the pattern or after option
|
||||||
|
settings with a similar syntax.
|
||||||
|
<pre>
|
||||||
|
(*CR) carriage return only
|
||||||
|
(*LF) linefeed only
|
||||||
|
(*CRLF) carriage return followed by linefeed
|
||||||
|
(*ANYCRLF) all three of the above
|
||||||
|
(*ANY) any Unicode newline sequence
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC17" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
|
<br><a name="SEC18" href="#TOC1">WHAT \R MATCHES</a><br>
|
||||||
|
<P>
|
||||||
|
These are recognized only at the very start of the pattern or after option
|
||||||
|
setting with a similar syntax.
|
||||||
|
<pre>
|
||||||
|
(*BSR_ANYCRLF) CR, LF, or CRLF
|
||||||
|
(*BSR_UNICODE) any Unicode newline sequence
|
||||||
|
</PRE>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC19" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
(?=...) positive look ahead
|
(?=...) positive look ahead
|
||||||
@ -393,7 +431,7 @@ newline-setting options with similar syntax:
|
|||||||
</pre>
|
</pre>
|
||||||
Each top-level branch of a look behind must be of a fixed length.
|
Each top-level branch of a look behind must be of a fixed length.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC18" href="#TOC1">BACKREFERENCES</a><br>
|
<br><a name="SEC20" href="#TOC1">BACKREFERENCES</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
\n reference by number (can be ambiguous)
|
\n reference by number (can be ambiguous)
|
||||||
@ -407,7 +445,7 @@ Each top-level branch of a look behind must be of a fixed length.
|
|||||||
(?P=name) reference by name (Python)
|
(?P=name) reference by name (Python)
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC19" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
|
<br><a name="SEC21" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
(?R) recurse whole pattern
|
(?R) recurse whole pattern
|
||||||
@ -426,7 +464,7 @@ Each top-level branch of a look behind must be of a fixed length.
|
|||||||
\g'-n' call subpattern by relative number (PCRE extension)
|
\g'-n' call subpattern by relative number (PCRE extension)
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC20" href="#TOC1">CONDITIONAL PATTERNS</a><br>
|
<br><a name="SEC22" href="#TOC1">CONDITIONAL PATTERNS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
(?(condition)yes-pattern)
|
(?(condition)yes-pattern)
|
||||||
@ -445,7 +483,7 @@ Each top-level branch of a look behind must be of a fixed length.
|
|||||||
(?(assert)... assertion condition
|
(?(assert)... assertion condition
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
<br><a name="SEC23" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
||||||
<P>
|
<P>
|
||||||
The following act immediately they are reached:
|
The following act immediately they are reached:
|
||||||
<pre>
|
<pre>
|
||||||
@ -468,27 +506,6 @@ pattern is not anchored.
|
|||||||
(*THEN:NAME) equivalent to (*MARK:NAME)(*THEN)
|
(*THEN:NAME) equivalent to (*MARK:NAME)(*THEN)
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC22" href="#TOC1">NEWLINE CONVENTIONS</a><br>
|
|
||||||
<P>
|
|
||||||
These are recognized only at the very start of the pattern or after a
|
|
||||||
(*BSR_...), (*UTF8), (*UTF16), (*UTF32) or (*UCP) option.
|
|
||||||
<pre>
|
|
||||||
(*CR) carriage return only
|
|
||||||
(*LF) linefeed only
|
|
||||||
(*CRLF) carriage return followed by linefeed
|
|
||||||
(*ANYCRLF) all three of the above
|
|
||||||
(*ANY) any Unicode newline sequence
|
|
||||||
</PRE>
|
|
||||||
</P>
|
|
||||||
<br><a name="SEC23" href="#TOC1">WHAT \R MATCHES</a><br>
|
|
||||||
<P>
|
|
||||||
These are recognized only at the very start of the pattern or after a
|
|
||||||
(*...) option that sets the newline convention or a UTF or UCP mode.
|
|
||||||
<pre>
|
|
||||||
(*BSR_ANYCRLF) CR, LF, or CRLF
|
|
||||||
(*BSR_UNICODE) any Unicode newline sequence
|
|
||||||
</PRE>
|
|
||||||
</P>
|
|
||||||
<br><a name="SEC24" href="#TOC1">CALLOUTS</a><br>
|
<br><a name="SEC24" href="#TOC1">CALLOUTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
@ -512,9 +529,9 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 11 November 2012
|
Last updated: 08 January 2014
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2014 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -14,21 +14,22 @@ man page, in case the conversion went wrong.
|
|||||||
<br>
|
<br>
|
||||||
<ul>
|
<ul>
|
||||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
|
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
|
||||||
<li><a name="TOC2" href="#SEC2">PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
|
<li><a name="TOC2" href="#SEC2">INPUT DATA FORMAT</a>
|
||||||
<li><a name="TOC3" href="#SEC3">COMMAND LINE OPTIONS</a>
|
<li><a name="TOC3" href="#SEC3">PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
|
||||||
<li><a name="TOC4" href="#SEC4">DESCRIPTION</a>
|
<li><a name="TOC4" href="#SEC4">COMMAND LINE OPTIONS</a>
|
||||||
<li><a name="TOC5" href="#SEC5">PATTERN MODIFIERS</a>
|
<li><a name="TOC5" href="#SEC5">DESCRIPTION</a>
|
||||||
<li><a name="TOC6" href="#SEC6">DATA LINES</a>
|
<li><a name="TOC6" href="#SEC6">PATTERN MODIFIERS</a>
|
||||||
<li><a name="TOC7" href="#SEC7">THE ALTERNATIVE MATCHING FUNCTION</a>
|
<li><a name="TOC7" href="#SEC7">DATA LINES</a>
|
||||||
<li><a name="TOC8" href="#SEC8">DEFAULT OUTPUT FROM PCRETEST</a>
|
<li><a name="TOC8" href="#SEC8">THE ALTERNATIVE MATCHING FUNCTION</a>
|
||||||
<li><a name="TOC9" href="#SEC9">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a>
|
<li><a name="TOC9" href="#SEC9">DEFAULT OUTPUT FROM PCRETEST</a>
|
||||||
<li><a name="TOC10" href="#SEC10">RESTARTING AFTER A PARTIAL MATCH</a>
|
<li><a name="TOC10" href="#SEC10">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a>
|
||||||
<li><a name="TOC11" href="#SEC11">CALLOUTS</a>
|
<li><a name="TOC11" href="#SEC11">RESTARTING AFTER A PARTIAL MATCH</a>
|
||||||
<li><a name="TOC12" href="#SEC12">NON-PRINTING CHARACTERS</a>
|
<li><a name="TOC12" href="#SEC12">CALLOUTS</a>
|
||||||
<li><a name="TOC13" href="#SEC13">SAVING AND RELOADING COMPILED PATTERNS</a>
|
<li><a name="TOC13" href="#SEC13">NON-PRINTING CHARACTERS</a>
|
||||||
<li><a name="TOC14" href="#SEC14">SEE ALSO</a>
|
<li><a name="TOC14" href="#SEC14">SAVING AND RELOADING COMPILED PATTERNS</a>
|
||||||
<li><a name="TOC15" href="#SEC15">AUTHOR</a>
|
<li><a name="TOC15" href="#SEC15">SEE ALSO</a>
|
||||||
<li><a name="TOC16" href="#SEC16">REVISION</a>
|
<li><a name="TOC16" href="#SEC16">AUTHOR</a>
|
||||||
|
<li><a name="TOC17" href="#SEC17">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -63,25 +64,34 @@ conjunction with the test script and data files that are distributed as part of
|
|||||||
PCRE, and are unlikely to be of use otherwise. They are all documented here,
|
PCRE, and are unlikely to be of use otherwise. They are all documented here,
|
||||||
but without much justification.
|
but without much justification.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
|
<br><a name="SEC2" href="#TOC1">INPUT DATA FORMAT</a><br>
|
||||||
|
<P>
|
||||||
|
Input to <b>pcretest</b> is processed line by line, either by calling the C
|
||||||
|
library's <b>fgets()</b> function, or via the <b>libreadline</b> library (see
|
||||||
|
below). In Unix-like environments, <b>fgets()</b> treats any bytes other than
|
||||||
|
newline as data characters. However, in some Windows environments character 26
|
||||||
|
(hex 1A) causes an immediate end of file, and no further data is read. For
|
||||||
|
maximum portability, therefore, it is safest to use only ASCII characters in
|
||||||
|
<b>pcretest</b> input files.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC3" href="#TOC1">PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
|
||||||
<P>
|
<P>
|
||||||
From release 8.30, two separate PCRE libraries can be built. The original one
|
From release 8.30, two separate PCRE libraries can be built. The original one
|
||||||
supports 8-bit character strings, whereas the newer 16-bit library supports
|
supports 8-bit character strings, whereas the newer 16-bit library supports
|
||||||
character strings encoded in 16-bit units. From release 8.32, a third
|
character strings encoded in 16-bit units. From release 8.32, a third library
|
||||||
library can be built, supporting character strings encoded in 32-bit units.
|
can be built, supporting character strings encoded in 32-bit units. The
|
||||||
The <b>pcretest</b> program can be
|
<b>pcretest</b> program can be used to test all three libraries. However, it is
|
||||||
used to test all three libraries. However, it is itself still an 8-bit program,
|
itself still an 8-bit program, reading 8-bit input and writing 8-bit output.
|
||||||
reading 8-bit input and writing 8-bit output. When testing the 16-bit or 32-bit
|
When testing the 16-bit or 32-bit library, the patterns and data strings are
|
||||||
library, the patterns and data strings are converted to 16- or 32-bit format
|
converted to 16- or 32-bit format before being passed to the PCRE library
|
||||||
before being passed to the PCRE library functions. Results are converted to
|
functions. Results are converted to 8-bit for output.
|
||||||
8-bit for output.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
References to functions and structures of the form <b>pcre[16|32]_xx</b> below
|
References to functions and structures of the form <b>pcre[16|32]_xx</b> below
|
||||||
mean "<b>pcre_xx</b> when using the 8-bit library or <b>pcre16_xx</b> when using
|
mean "<b>pcre_xx</b> when using the 8-bit library, <b>pcre16_xx</b> when using
|
||||||
the 16-bit library".
|
the 16-bit library, or <b>pcre32_xx</b> when using the 32-bit library".
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">COMMAND LINE OPTIONS</a><br>
|
<br><a name="SEC4" href="#TOC1">COMMAND LINE OPTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>-8</b>
|
<b>-8</b>
|
||||||
If both the 8-bit library has been built, this option causes the 8-bit library
|
If both the 8-bit library has been built, this option causes the 8-bit library
|
||||||
@ -110,23 +120,30 @@ internal form is output after compilation.
|
|||||||
<P>
|
<P>
|
||||||
<b>-C</b>
|
<b>-C</b>
|
||||||
Output the version number of the PCRE library, and all available information
|
Output the version number of the PCRE library, and all available information
|
||||||
about the optional features that are included, and then exit. All other options
|
about the optional features that are included, and then exit with zero exit
|
||||||
are ignored.
|
code. All other options are ignored.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-C</b> <i>option</i>
|
<b>-C</b> <i>option</i>
|
||||||
Output information about a specific build-time option, then exit. This
|
Output information about a specific build-time option, then exit. This
|
||||||
functionality is intended for use in scripts such as <b>RunTest</b>. The
|
functionality is intended for use in scripts such as <b>RunTest</b>. The
|
||||||
following options output the value indicated:
|
following options output the value and set the exit code as indicated:
|
||||||
<pre>
|
<pre>
|
||||||
ebcdic-nl the code for LF (= NL) in an EBCDIC environment:
|
ebcdic-nl the code for LF (= NL) in an EBCDIC environment:
|
||||||
0x15 or 0x25
|
0x15 or 0x25
|
||||||
0 if used in an ASCII environment
|
0 if used in an ASCII environment
|
||||||
linksize the internal link size (2, 3, or 4)
|
exit code is always 0
|
||||||
|
linksize the configured internal link size (2, 3, or 4)
|
||||||
|
exit code is set to the link size
|
||||||
newline the default newline setting:
|
newline the default newline setting:
|
||||||
CR, LF, CRLF, ANYCRLF, or ANY
|
CR, LF, CRLF, ANYCRLF, or ANY
|
||||||
|
exit code is always 0
|
||||||
|
bsr the default setting for what \R matches:
|
||||||
|
ANYCRLF or ANY
|
||||||
|
exit code is always 0
|
||||||
</pre>
|
</pre>
|
||||||
The following options output 1 for true or zero for false:
|
The following options output 1 for true or 0 for false, and set the exit code
|
||||||
|
to the same value:
|
||||||
<pre>
|
<pre>
|
||||||
ebcdic compiled for an EBCDIC environment
|
ebcdic compiled for an EBCDIC environment
|
||||||
jit just-in-time support is available
|
jit just-in-time support is available
|
||||||
@ -134,8 +151,10 @@ The following options output 1 for true or zero for false:
|
|||||||
pcre32 the 32-bit library was built
|
pcre32 the 32-bit library was built
|
||||||
pcre8 the 8-bit library was built
|
pcre8 the 8-bit library was built
|
||||||
ucp Unicode property support is available
|
ucp Unicode property support is available
|
||||||
utf UTF-8 and/or UTF-16 and/or UTF-32 support is available
|
utf UTF-8 and/or UTF-16 and/or UTF-32 support
|
||||||
</PRE>
|
is available
|
||||||
|
</pre>
|
||||||
|
If an unknown option is given, an error message is output; the exit code is 0.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-d</b>
|
<b>-d</b>
|
||||||
@ -171,6 +190,11 @@ equivalent to adding <b>/M</b> to each regular expression. The size is given in
|
|||||||
bytes for both libraries.
|
bytes for both libraries.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>-O</b>
|
||||||
|
Behave as if each pattern has the <b>/O</b> modifier, that is disable
|
||||||
|
auto-possessification for all patterns.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
<b>-o</b> <i>osize</i>
|
<b>-o</b> <i>osize</i>
|
||||||
Set the number of elements in the output vector that is used when calling
|
Set the number of elements in the output vector that is used when calling
|
||||||
<b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> to be <i>osize</i>. The
|
<b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> to be <i>osize</i>. The
|
||||||
@ -240,20 +264,25 @@ should never be studied (see the <b>/S</b> pattern modifier below).
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-t</b>
|
<b>-t</b>
|
||||||
Run each compile, study, and match many times with a timer, and output
|
Run each compile, study, and match many times with a timer, and output the
|
||||||
resulting time per compile or match (in milliseconds). Do not set <b>-m</b> with
|
resulting times per compile, study, or match (in milliseconds). Do not set
|
||||||
<b>-t</b>, because you will then get the size output a zillion times, and the
|
<b>-m</b> with <b>-t</b>, because you will then get the size output a zillion
|
||||||
timing will be distorted. You can control the number of iterations that are
|
times, and the timing will be distorted. You can control the number of
|
||||||
used for timing by following <b>-t</b> with a number (as a separate item on the
|
iterations that are used for timing by following <b>-t</b> with a number (as a
|
||||||
command line). For example, "-t 1000" would iterate 1000 times. The default is
|
separate item on the command line). For example, "-t 1000" iterates 1000 times.
|
||||||
to iterate 500000 times.
|
The default is to iterate 500000 times.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-tm</b>
|
<b>-tm</b>
|
||||||
This is like <b>-t</b> except that it times only the matching phase, not the
|
This is like <b>-t</b> except that it times only the matching phase, not the
|
||||||
compile or study phases.
|
compile or study phases.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">DESCRIPTION</a><br>
|
<P>
|
||||||
|
<b>-T</b> <b>-TM</b>
|
||||||
|
These behave like <b>-t</b> and <b>-tm</b>, but in addition, at the end of a run,
|
||||||
|
the total times for all compiles, studies, and matches are output.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC5" href="#TOC1">DESCRIPTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
If <b>pcretest</b> is given two filename arguments, it reads from the first and
|
If <b>pcretest</b> is given two filename arguments, it reads from the first and
|
||||||
writes to the second. If it is given only one filename argument, it reads from
|
writes to the second. If it is given only one filename argument, it reads from
|
||||||
@ -271,7 +300,7 @@ option states whether or not <b>readline()</b> will be used.
|
|||||||
<P>
|
<P>
|
||||||
The program handles any number of sets of input on a single input file. Each
|
The program handles any number of sets of input on a single input file. Each
|
||||||
set starts with a regular expression, and continues with any number of data
|
set starts with a regular expression, and continues with any number of data
|
||||||
lines to be matched against the pattern.
|
lines to be matched against that pattern.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Each data line is matched separately and independently. If you want to do
|
Each data line is matched separately and independently. If you want to do
|
||||||
@ -310,7 +339,7 @@ backslash, because
|
|||||||
is interpreted as the first line of a pattern that starts with "abc/", causing
|
is interpreted as the first line of a pattern that starts with "abc/", causing
|
||||||
pcretest to read the next line as a continuation of the regular expression.
|
pcretest to read the next line as a continuation of the regular expression.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">PATTERN MODIFIERS</a><br>
|
<br><a name="SEC6" href="#TOC1">PATTERN MODIFIERS</a><br>
|
||||||
<P>
|
<P>
|
||||||
A pattern may be followed by any number of modifiers, which are mostly single
|
A pattern may be followed by any number of modifiers, which are mostly single
|
||||||
characters, though some of these can be qualified by further characters.
|
characters, though some of these can be qualified by further characters.
|
||||||
@ -323,6 +352,7 @@ fall into several groups that are described in detail in the following
|
|||||||
sections.
|
sections.
|
||||||
<pre>
|
<pre>
|
||||||
<b>/8</b> set UTF mode
|
<b>/8</b> set UTF mode
|
||||||
|
<b>/9</b> set PCRE_NEVER_UTF (locks out UTF mode)
|
||||||
<b>/?</b> disable UTF validity check
|
<b>/?</b> disable UTF validity check
|
||||||
<b>/+</b> show remainder of subject after match
|
<b>/+</b> show remainder of subject after match
|
||||||
<b>/=</b> show all captures (not just those that are set)
|
<b>/=</b> show all captures (not just those that are set)
|
||||||
@ -344,7 +374,9 @@ sections.
|
|||||||
<b>/M</b> show compiled memory size
|
<b>/M</b> show compiled memory size
|
||||||
<b>/m</b> set PCRE_MULTILINE
|
<b>/m</b> set PCRE_MULTILINE
|
||||||
<b>/N</b> set PCRE_NO_AUTO_CAPTURE
|
<b>/N</b> set PCRE_NO_AUTO_CAPTURE
|
||||||
|
<b>/O</b> set PCRE_NO_AUTO_POSSESS
|
||||||
<b>/P</b> use the POSIX wrapper
|
<b>/P</b> use the POSIX wrapper
|
||||||
|
<b>/Q</b> test external stack check function
|
||||||
<b>/S</b> study the pattern after compilation
|
<b>/S</b> study the pattern after compilation
|
||||||
<b>/s</b> set PCRE_DOTALL
|
<b>/s</b> set PCRE_DOTALL
|
||||||
<b>/T</b> select character tables
|
<b>/T</b> select character tables
|
||||||
@ -395,12 +427,14 @@ options that do not correspond to anything in Perl:
|
|||||||
<b>/8</b> PCRE_UTF32 ) when using the 32-bit
|
<b>/8</b> PCRE_UTF32 ) when using the 32-bit
|
||||||
<b>/?</b> PCRE_NO_UTF32_CHECK ) library
|
<b>/?</b> PCRE_NO_UTF32_CHECK ) library
|
||||||
|
|
||||||
|
<b>/9</b> PCRE_NEVER_UTF
|
||||||
<b>/A</b> PCRE_ANCHORED
|
<b>/A</b> PCRE_ANCHORED
|
||||||
<b>/C</b> PCRE_AUTO_CALLOUT
|
<b>/C</b> PCRE_AUTO_CALLOUT
|
||||||
<b>/E</b> PCRE_DOLLAR_ENDONLY
|
<b>/E</b> PCRE_DOLLAR_ENDONLY
|
||||||
<b>/f</b> PCRE_FIRSTLINE
|
<b>/f</b> PCRE_FIRSTLINE
|
||||||
<b>/J</b> PCRE_DUPNAMES
|
<b>/J</b> PCRE_DUPNAMES
|
||||||
<b>/N</b> PCRE_NO_AUTO_CAPTURE
|
<b>/N</b> PCRE_NO_AUTO_CAPTURE
|
||||||
|
<b>/O</b> PCRE_NO_AUTO_POSSESS
|
||||||
<b>/U</b> PCRE_UNGREEDY
|
<b>/U</b> PCRE_UNGREEDY
|
||||||
<b>/W</b> PCRE_UCP
|
<b>/W</b> PCRE_UCP
|
||||||
<b>/X</b> PCRE_EXTRA
|
<b>/X</b> PCRE_EXTRA
|
||||||
@ -504,7 +538,10 @@ below.
|
|||||||
The <b>/I</b> modifier requests that <b>pcretest</b> output information about the
|
The <b>/I</b> modifier requests that <b>pcretest</b> output information about the
|
||||||
compiled pattern (whether it is anchored, has a fixed first character, and
|
compiled pattern (whether it is anchored, has a fixed first character, and
|
||||||
so on). It does this by calling <b>pcre[16|32]_fullinfo()</b> after compiling a
|
so on). It does this by calling <b>pcre[16|32]_fullinfo()</b> after compiling a
|
||||||
pattern. If the pattern is studied, the results of that are also output.
|
pattern. If the pattern is studied, the results of that are also output. In
|
||||||
|
this output, the word "char" means a non-UTF character, that is, the value of a
|
||||||
|
single data item (8-bit, 16-bit, or 32-bit, depending on the library that is
|
||||||
|
being tested).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>/K</b> modifier requests <b>pcretest</b> to show names from backtracking
|
The <b>/K</b> modifier requests <b>pcretest</b> to show names from backtracking
|
||||||
@ -538,14 +575,22 @@ successfully studied with the PCRE_STUDY_JIT_COMPILE option, the size of the
|
|||||||
JIT compiled code is also output.
|
JIT compiled code is also output.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
The <b>/Q</b> modifier is used to test the use of <b>pcre_stack_guard</b>. It
|
||||||
|
must be followed by '0' or '1', specifying the return code to be given from an
|
||||||
|
external function that is passed to PCRE and used for stack checking during
|
||||||
|
compilation (see the
|
||||||
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
|
documentation for details).
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
The <b>/S</b> modifier causes <b>pcre[16|32]_study()</b> to be called after the
|
The <b>/S</b> modifier causes <b>pcre[16|32]_study()</b> to be called after the
|
||||||
expression has been compiled, and the results used when the expression is
|
expression has been compiled, and the results used when the expression is
|
||||||
matched. There are a number of qualifying characters that may follow <b>/S</b>.
|
matched. There are a number of qualifying characters that may follow <b>/S</b>.
|
||||||
They may appear in any order.
|
They may appear in any order.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If <b>S</b> is followed by an exclamation mark, <b>pcre[16|32]_study()</b> is called
|
If <b>/S</b> is followed by an exclamation mark, <b>pcre[16|32]_study()</b> is
|
||||||
with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
|
called with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
|
||||||
<b>pcre_extra</b> block, even when studying discovers no useful information.
|
<b>pcre_extra</b> block, even when studying discovers no useful information.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
@ -624,7 +669,38 @@ function:
|
|||||||
The <b>/+</b> modifier works as described above. All other modifiers are
|
The <b>/+</b> modifier works as described above. All other modifiers are
|
||||||
ignored.
|
ignored.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">DATA LINES</a><br>
|
<br><b>
|
||||||
|
Locking out certain modifiers
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
PCRE can be compiled with or without support for certain features such as
|
||||||
|
UTF-8/16/32 or Unicode properties. Accordingly, the standard tests are split up
|
||||||
|
into a number of different files that are selected for running depending on
|
||||||
|
which features are available. When updating the tests, it is all too easy to
|
||||||
|
put a new test into the wrong file by mistake; for example, to put a test that
|
||||||
|
requires UTF support into a file that is used when it is not available. To help
|
||||||
|
detect such mistakes as early as possible, there is a facility for locking out
|
||||||
|
specific modifiers. If an input line for <b>pcretest</b> starts with the string
|
||||||
|
"< forbid " the following sequence of characters is taken as a list of
|
||||||
|
forbidden modifiers. For example, in the test files that must not use UTF or
|
||||||
|
Unicode property support, this line appears:
|
||||||
|
<pre>
|
||||||
|
< forbid 8W
|
||||||
|
</pre>
|
||||||
|
This locks out the /8 and /W modifiers. An immediate error is given if they are
|
||||||
|
subsequently encountered. If the character string contains < but not >, all the
|
||||||
|
multi-character modifiers that begin with < are locked out. Otherwise, such
|
||||||
|
modifiers must be explicitly listed, for example:
|
||||||
|
<pre>
|
||||||
|
< forbid <JS><cr>
|
||||||
|
</pre>
|
||||||
|
There must be a single space between < and "forbid" for this feature to be
|
||||||
|
recognised. If there is not, the line is interpreted either as a request to
|
||||||
|
re-load a pre-compiled pattern (see "SAVING AND RELOADING COMPILED PATTERNS"
|
||||||
|
below) or, if there is a another < character, as a pattern that uses < as its
|
||||||
|
delimiter.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC7" href="#TOC1">DATA LINES</a><br>
|
||||||
<P>
|
<P>
|
||||||
Before each data line is passed to <b>pcre[16|32]_exec()</b>, leading and trailing
|
Before each data line is passed to <b>pcre[16|32]_exec()</b>, leading and trailing
|
||||||
white space is removed, and it is then scanned for \ escapes. Some of these
|
white space is removed, and it is then scanned for \ escapes. Some of these
|
||||||
@ -644,6 +720,7 @@ recognized:
|
|||||||
\v vertical tab (\x0b)
|
\v vertical tab (\x0b)
|
||||||
\nnn octal character (up to 3 octal digits); always
|
\nnn octal character (up to 3 octal digits); always
|
||||||
a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
|
a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
|
||||||
|
\o{dd...} octal character (any number of octal digits}
|
||||||
\xhh hexadecimal byte (up to 2 hex digits)
|
\xhh hexadecimal byte (up to 2 hex digits)
|
||||||
\x{hh...} hexadecimal character (any number of hex digits)
|
\x{hh...} hexadecimal character (any number of hex digits)
|
||||||
\A pass the PCRE_ANCHORED option to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b>
|
\A pass the PCRE_ANCHORED option to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b>
|
||||||
@ -748,7 +825,7 @@ API to be used, the only option-setting sequences that have any effect are \B,
|
|||||||
\N, and \Z, causing REG_NOTBOL, REG_NOTEMPTY, and REG_NOTEOL, respectively,
|
\N, and \Z, causing REG_NOTBOL, REG_NOTEMPTY, and REG_NOTEOL, respectively,
|
||||||
to be passed to <b>regexec()</b>.
|
to be passed to <b>regexec()</b>.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC7" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
<br><a name="SEC8" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
By default, <b>pcretest</b> uses the standard PCRE matching function,
|
By default, <b>pcretest</b> uses the standard PCRE matching function,
|
||||||
<b>pcre[16|32]_exec()</b> to match each data line. PCRE also supports an
|
<b>pcre[16|32]_exec()</b> to match each data line. PCRE also supports an
|
||||||
@ -765,7 +842,7 @@ This function finds all possible matches at a given point. If, however, the \F
|
|||||||
escape sequence is present in the data line, it stops after the first match is
|
escape sequence is present in the data line, it stops after the first match is
|
||||||
found. This is always the shortest possible match.
|
found. This is always the shortest possible match.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC8" href="#TOC1">DEFAULT OUTPUT FROM PCRETEST</a><br>
|
<br><a name="SEC9" href="#TOC1">DEFAULT OUTPUT FROM PCRETEST</a><br>
|
||||||
<P>
|
<P>
|
||||||
This section describes the output when the normal matching function,
|
This section describes the output when the normal matching function,
|
||||||
<b>pcre[16|32]_exec()</b>, is being used.
|
<b>pcre[16|32]_exec()</b>, is being used.
|
||||||
@ -856,7 +933,7 @@ prompt is used for continuations), data lines may not. However newlines can be
|
|||||||
included in data by means of the \n escape (or \r, \r\n, etc., depending on
|
included in data by means of the \n escape (or \r, \r\n, etc., depending on
|
||||||
the newline sequence setting).
|
the newline sequence setting).
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC9" href="#TOC1">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
<br><a name="SEC10" href="#TOC1">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
When the alternative matching function, <b>pcre[16|32]_dfa_exec()</b>, is used (by
|
When the alternative matching function, <b>pcre[16|32]_dfa_exec()</b>, is used (by
|
||||||
means of the \D escape sequence or the <b>-dfa</b> command line option), the
|
means of the \D escape sequence or the <b>-dfa</b> command line option), the
|
||||||
@ -892,7 +969,7 @@ at the end of the longest match. For example:
|
|||||||
Since the matching function does not support substring capture, the escape
|
Since the matching function does not support substring capture, the escape
|
||||||
sequences that are concerned with captured substrings are not relevant.
|
sequences that are concerned with captured substrings are not relevant.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC10" href="#TOC1">RESTARTING AFTER A PARTIAL MATCH</a><br>
|
<br><a name="SEC11" href="#TOC1">RESTARTING AFTER A PARTIAL MATCH</a><br>
|
||||||
<P>
|
<P>
|
||||||
When the alternative matching function has given the PCRE_ERROR_PARTIAL return,
|
When the alternative matching function has given the PCRE_ERROR_PARTIAL return,
|
||||||
indicating that the subject partially matched the pattern, you can restart the
|
indicating that the subject partially matched the pattern, you can restart the
|
||||||
@ -909,7 +986,7 @@ For further information about partial matching, see the
|
|||||||
<a href="pcrepartial.html"><b>pcrepartial</b></a>
|
<a href="pcrepartial.html"><b>pcrepartial</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC11" href="#TOC1">CALLOUTS</a><br>
|
<br><a name="SEC12" href="#TOC1">CALLOUTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
If the pattern contains any callout requests, <b>pcretest</b>'s callout function
|
If the pattern contains any callout requests, <b>pcretest</b>'s callout function
|
||||||
is called during matching. This works with both matching functions. By default,
|
is called during matching. This works with both matching functions. By default,
|
||||||
@ -970,7 +1047,7 @@ the
|
|||||||
<a href="pcrecallout.html"><b>pcrecallout</b></a>
|
<a href="pcrecallout.html"><b>pcrecallout</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">NON-PRINTING CHARACTERS</a><br>
|
<br><a name="SEC13" href="#TOC1">NON-PRINTING CHARACTERS</a><br>
|
||||||
<P>
|
<P>
|
||||||
When <b>pcretest</b> is outputting text in the compiled version of a pattern,
|
When <b>pcretest</b> is outputting text in the compiled version of a pattern,
|
||||||
bytes other than 32-126 are always treated as non-printing characters are are
|
bytes other than 32-126 are always treated as non-printing characters are are
|
||||||
@ -982,7 +1059,7 @@ string, it behaves in the same way, unless a different locale has been set for
|
|||||||
the pattern (using the <b>/L</b> modifier). In this case, the <b>isprint()</b>
|
the pattern (using the <b>/L</b> modifier). In this case, the <b>isprint()</b>
|
||||||
function to distinguish printing and non-printing characters.
|
function to distinguish printing and non-printing characters.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC13" href="#TOC1">SAVING AND RELOADING COMPILED PATTERNS</a><br>
|
<br><a name="SEC14" href="#TOC1">SAVING AND RELOADING COMPILED PATTERNS</a><br>
|
||||||
<P>
|
<P>
|
||||||
The facilities described in this section are not available when the POSIX
|
The facilities described in this section are not available when the POSIX
|
||||||
interface to PCRE is being used, that is, when the <b>/P</b> pattern modifier is
|
interface to PCRE is being used, that is, when the <b>/P</b> pattern modifier is
|
||||||
@ -1013,10 +1090,9 @@ writing the file, <b>pcretest</b> expects to read a new pattern.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A saved pattern can be reloaded into <b>pcretest</b> by specifying < and a file
|
A saved pattern can be reloaded into <b>pcretest</b> by specifying < and a file
|
||||||
name instead of a pattern. The name of the file must not contain a < character,
|
name instead of a pattern. There must be no space between < and the file name,
|
||||||
as otherwise <b>pcretest</b> will interpret the line as a pattern delimited by <
|
which must not contain a < character, as otherwise <b>pcretest</b> will
|
||||||
characters.
|
interpret the line as a pattern delimited by < characters. For example:
|
||||||
For example:
|
|
||||||
<pre>
|
<pre>
|
||||||
re> </some/file
|
re> </some/file
|
||||||
Compiled pattern loaded from /some/file
|
Compiled pattern loaded from /some/file
|
||||||
@ -1055,14 +1131,14 @@ string using a reloaded pattern is likely to cause <b>pcretest</b> to crash.
|
|||||||
Finally, if you attempt to load a file that is not in the correct format, the
|
Finally, if you attempt to load a file that is not in the correct format, the
|
||||||
result is undefined.
|
result is undefined.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC14" href="#TOC1">SEE ALSO</a><br>
|
<br><a name="SEC15" href="#TOC1">SEE ALSO</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre</b>(3), <b>pcre16</b>(3), <b>pcre32</b>(3), <b>pcreapi</b>(3),
|
<b>pcre</b>(3), <b>pcre16</b>(3), <b>pcre32</b>(3), <b>pcreapi</b>(3),
|
||||||
<b>pcrecallout</b>(3),
|
<b>pcrecallout</b>(3),
|
||||||
<b>pcrejit</b>, <b>pcrematching</b>(3), <b>pcrepartial</b>(d),
|
<b>pcrejit</b>, <b>pcrematching</b>(3), <b>pcrepartial</b>(d),
|
||||||
<b>pcrepattern</b>(3), <b>pcreprecompile</b>(3).
|
<b>pcrepattern</b>(3), <b>pcreprecompile</b>(3).
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC15" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC16" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
@ -1071,11 +1147,11 @@ University Computing Service
|
|||||||
Cambridge CB2 3QH, England.
|
Cambridge CB2 3QH, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC16" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC17" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 10 September 2012
|
Last updated: 09 February 2014
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2014 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -85,7 +85,9 @@ place. From release 7.3 of PCRE, the check is according the rules of RFC 3629,
|
|||||||
which are themselves derived from the Unicode specification. Earlier releases
|
which are themselves derived from the Unicode specification. Earlier releases
|
||||||
of PCRE followed the rules of RFC 2279, which allows the full range of 31-bit
|
of PCRE followed the rules of RFC 2279, which allows the full range of 31-bit
|
||||||
values (0 to 0x7FFFFFFF). The current check allows only values in the range U+0
|
values (0 to 0x7FFFFFFF). The current check allows only values in the range U+0
|
||||||
to U+10FFFF, excluding the surrogate area and the non-characters.
|
to U+10FFFF, excluding the surrogate area. (From release 8.33 the so-called
|
||||||
|
"non-character" code points are no longer excluded because Unicode corrigendum
|
||||||
|
#9 makes it clear that they should not be.)
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
|
Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
|
||||||
@ -96,10 +98,6 @@ surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and
|
|||||||
UTF-32.)
|
UTF-32.)
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Also excluded are the "Non-Character" code points, which are U+FDD0 to U+FDEF
|
|
||||||
and the last two code points in each plane, U+??FFFE and U+??FFFF.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
If an invalid UTF-8 string is passed to PCRE, an error return is given. At
|
If an invalid UTF-8 string is passed to PCRE, an error return is given. At
|
||||||
compile time, the only additional information is the offset to the first byte
|
compile time, the only additional information is the offset to the first byte
|
||||||
of the failing character. The run-time functions <b>pcre_exec()</b> and
|
of the failing character. The run-time functions <b>pcre_exec()</b> and
|
||||||
@ -135,10 +133,6 @@ U+D800 to U+DFFF are independent code points. Values in the surrogate range
|
|||||||
must be used in pairs in the correct manner.
|
must be used in pairs in the correct manner.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Excluded are the "Non-Character" code points, which are U+FDD0 to U+FDEF
|
|
||||||
and the last two code points in each plane, U+??FFFE and U+??FFFF.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
If an invalid UTF-16 string is passed to PCRE, an error return is given. At
|
If an invalid UTF-16 string is passed to PCRE, an error return is given. At
|
||||||
compile time, the only additional information is the offset to the first data
|
compile time, the only additional information is the offset to the first data
|
||||||
unit of the failing character. The run-time functions <b>pcre16_exec()</b> and
|
unit of the failing character. The run-time functions <b>pcre16_exec()</b> and
|
||||||
@ -160,9 +154,7 @@ Validity of UTF-32 strings
|
|||||||
When you set the PCRE_UTF32 flag, the strings of 32-bit data units that are
|
When you set the PCRE_UTF32 flag, the strings of 32-bit data units that are
|
||||||
passed as patterns and subjects are (by default) checked for validity on entry
|
passed as patterns and subjects are (by default) checked for validity on entry
|
||||||
to the relevant functions. This check allows only values in the range U+0
|
to the relevant functions. This check allows only values in the range U+0
|
||||||
to U+10FFFF, excluding the surrogate area U+D800 to U+DFFF, and the
|
to U+10FFFF, excluding the surrogate area U+D800 to U+DFFF.
|
||||||
"Non-Character" code points, which are U+FDD0 to U+FDEF and the last two
|
|
||||||
characters in each plane, U+??FFFE and U+??FFFF.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If an invalid UTF-32 string is passed to PCRE, an error return is given. At
|
If an invalid UTF-32 string is passed to PCRE, an error return is given. At
|
||||||
@ -261,9 +253,9 @@ Cambridge CB2 3QH, England.
|
|||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 11 November 2012
|
Last updated: 27 February 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -11,27 +11,29 @@
|
|||||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
<h1>Perl-compatible Regular Expressions (PCRE)</h1>
|
<h1>Perl-compatible Regular Expressions (PCRE)</h1>
|
||||||
<p>
|
<p>
|
||||||
The HTML documentation for PCRE comprises the following pages:
|
The HTML documentation for PCRE consists of a number of pages that are listed
|
||||||
|
below in alphabetical order. If you are new to PCRE, please read the first one
|
||||||
|
first.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<table>
|
<table>
|
||||||
<tr><td><a href="pcre.html">pcre</a></td>
|
<tr><td><a href="pcre.html">pcre</a></td>
|
||||||
<td> Introductory page</td></tr>
|
<td> Introductory page</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre-config.html">pcre-config</a></td>
|
||||||
|
<td> Information about the installation configuration</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre16.html">pcre16</a></td>
|
<tr><td><a href="pcre16.html">pcre16</a></td>
|
||||||
<td> Discussion of the 16-bit PCRE library</td></tr>
|
<td> Discussion of the 16-bit PCRE library</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre32.html">pcre32</a></td>
|
<tr><td><a href="pcre32.html">pcre32</a></td>
|
||||||
<td> Discussion of the 32-bit PCRE library</td></tr>
|
<td> Discussion of the 32-bit PCRE library</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre-config.html">pcre-config</a></td>
|
|
||||||
<td> Information about the installation configuration</td></tr>
|
|
||||||
|
|
||||||
<tr><td><a href="pcreapi.html">pcreapi</a></td>
|
<tr><td><a href="pcreapi.html">pcreapi</a></td>
|
||||||
<td> PCRE's native API</td></tr>
|
<td> PCRE's native API</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcrebuild.html">pcrebuild</a></td>
|
<tr><td><a href="pcrebuild.html">pcrebuild</a></td>
|
||||||
<td> Options for building PCRE</td></tr>
|
<td> Building PCRE</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcrecallout.html">pcrecallout</a></td>
|
<tr><td><a href="pcrecallout.html">pcrecallout</a></td>
|
||||||
<td> The <i>callout</i> facility</td></tr>
|
<td> The <i>callout</i> facility</td></tr>
|
||||||
@ -67,7 +69,7 @@ The HTML documentation for PCRE comprises the following pages:
|
|||||||
<td> Some comments on performance</td></tr>
|
<td> Some comments on performance</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcreposix.html">pcreposix</a></td>
|
<tr><td><a href="pcreposix.html">pcreposix</a></td>
|
||||||
<td> The POSIX API to the PCRE library</td></tr>
|
<td> The POSIX API to the PCRE 8-bit library</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcreprecompile.html">pcreprecompile</a></td>
|
<tr><td><a href="pcreprecompile.html">pcreprecompile</a></td>
|
||||||
<td> How to save and re-use compiled patterns</td></tr>
|
<td> How to save and re-use compiled patterns</td></tr>
|
||||||
@ -118,13 +120,13 @@ functions.
|
|||||||
<td> Match a compiled pattern to a subject string
|
<td> Match a compiled pattern to a subject string
|
||||||
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
|
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
|
|
||||||
<td> Free study data</td></tr>
|
|
||||||
|
|
||||||
<tr><td><a href="pcre_exec.html">pcre_exec</a></td>
|
<tr><td><a href="pcre_exec.html">pcre_exec</a></td>
|
||||||
<td> Match a compiled pattern to a subject string
|
<td> Match a compiled pattern to a subject string
|
||||||
(Perl compatible)</td></tr>
|
(Perl compatible)</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
|
||||||
|
<td> Free study data</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_free_substring.html">pcre_free_substring</a></td>
|
<tr><td><a href="pcre_free_substring.html">pcre_free_substring</a></td>
|
||||||
<td> Free extracted substring</td></tr>
|
<td> Free extracted substring</td></tr>
|
||||||
|
|
||||||
@ -140,14 +142,17 @@ functions.
|
|||||||
<tr><td><a href="pcre_get_stringnumber.html">pcre_get_stringnumber</a></td>
|
<tr><td><a href="pcre_get_stringnumber.html">pcre_get_stringnumber</a></td>
|
||||||
<td> Convert captured string name to number</td></tr>
|
<td> Convert captured string name to number</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre_get_stringtable_entries.html">pcre_get_stringtable_entries</a></td>
|
||||||
|
<td> Find table entries for given string name</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_get_substring.html">pcre_get_substring</a></td>
|
<tr><td><a href="pcre_get_substring.html">pcre_get_substring</a></td>
|
||||||
<td> Extract numbered substring into new memory</td></tr>
|
<td> Extract numbered substring into new memory</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_get_substring_list.html">pcre_get_substring_list</a></td>
|
<tr><td><a href="pcre_get_substring_list.html">pcre_get_substring_list</a></td>
|
||||||
<td> Extract all substrings into new memory</td></tr>
|
<td> Extract all substrings into new memory</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_info.html">pcre_info</a></td>
|
<tr><td><a href="pcre_jit_exec.html">pcre_jit_exec</a></td>
|
||||||
<td> Obsolete information extraction function</td></tr>
|
<td> Fast path interface to JIT matching</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre_jit_stack_alloc.html">pcre_jit_stack_alloc</a></td>
|
<tr><td><a href="pcre_jit_stack_alloc.html">pcre_jit_stack_alloc</a></td>
|
||||||
<td> Create a stack for JIT matching</td></tr>
|
<td> Create a stack for JIT matching</td></tr>
|
||||||
|
@ -4,11 +4,11 @@ pcre-config - program to return PCRE configuration
|
|||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
|
.B pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
|
||||||
.ti +5n
|
.B " [--libs16] [--libs32] [--libs-cpp] [--libs-posix]"
|
||||||
.B [--libs16] [--libs32] [--libs-cpp] [--libs-posix]
|
.B " [--cflags] [--cflags-posix]"
|
||||||
.ti +5n
|
.fi
|
||||||
.B [--cflags] [--cflags-posix]
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
PCRE-CONFIG(1) PCRE-CONFIG(1)
|
PCRE-CONFIG(1) General Commands Manual PCRE-CONFIG(1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE 3 "11 November 2012" "PCRE 8.32"
|
.TH PCRE 3 "08 January 2014" "PCRE 8.35"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH INTRODUCTION
|
.SH INTRODUCTION
|
||||||
@ -19,9 +19,9 @@ built. The majority of the work to make this possible was done by Zoltan
|
|||||||
Herczeg.
|
Herczeg.
|
||||||
.P
|
.P
|
||||||
Starting with release 8.32 it is possible to compile a third separate PCRE
|
Starting with release 8.32 it is possible to compile a third separate PCRE
|
||||||
library, which supports 32-bit character strings (including
|
library that supports 32-bit character strings (including UTF-32 strings). The
|
||||||
UTF-32 strings). The build process allows any set of the 8-, 16- and 32-bit
|
build process allows any combination of the 8-, 16- and 32-bit libraries. The
|
||||||
libraries. The work to make this possible was done by Christian Persch.
|
work to make this possible was done by Christian Persch.
|
||||||
.P
|
.P
|
||||||
The three libraries contain identical sets of functions, except that the names
|
The three libraries contain identical sets of functions, except that the names
|
||||||
in the 16-bit library start with \fBpcre16_\fP instead of \fBpcre_\fP, and the
|
in the 16-bit library start with \fBpcre16_\fP instead of \fBpcre_\fP, and the
|
||||||
@ -44,7 +44,7 @@ The current implementation of PCRE corresponds approximately with Perl 5.12,
|
|||||||
including support for UTF-8/16/32 encoded strings and Unicode general category
|
including support for UTF-8/16/32 encoded strings and Unicode general category
|
||||||
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
|
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
|
||||||
enabled; it is not the default. The Unicode tables correspond to Unicode
|
enabled; it is not the default. The Unicode tables correspond to Unicode
|
||||||
release 6.2.0.
|
release 6.3.0.
|
||||||
.P
|
.P
|
||||||
In addition to the Perl-compatible matching function, PCRE contains an
|
In addition to the Perl-compatible matching function, PCRE contains an
|
||||||
alternative function that matches the same compiled patterns in a different
|
alternative function that matches the same compiled patterns in a different
|
||||||
@ -68,6 +68,7 @@ in the \fIContrib\fP directory at the primary FTP site, which is:
|
|||||||
.\" HTML <a href="ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre">
|
.\" HTML <a href="ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre">
|
||||||
.\" </a>
|
.\" </a>
|
||||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre
|
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre
|
||||||
|
.\"
|
||||||
.P
|
.P
|
||||||
Details of exactly which Perl regular expression features are and are not
|
Details of exactly which Perl regular expression features are and are not
|
||||||
supported by PCRE are given in separate documents. See the
|
supported by PCRE are given in separate documents. See the
|
||||||
@ -95,8 +96,17 @@ available. The features themselves are described in the
|
|||||||
\fBpcrebuild\fP
|
\fBpcrebuild\fP
|
||||||
.\"
|
.\"
|
||||||
page. Documentation about building PCRE for various operating systems can be
|
page. Documentation about building PCRE for various operating systems can be
|
||||||
found in the \fBREADME\fP and \fBNON-AUTOTOOLS_BUILD\fP files in the source
|
found in the
|
||||||
distribution.
|
.\" HTML <a href="README.txt">
|
||||||
|
.\" </a>
|
||||||
|
\fBREADME\fP
|
||||||
|
.\"
|
||||||
|
and
|
||||||
|
.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt">
|
||||||
|
.\" </a>
|
||||||
|
\fBNON-AUTOTOOLS_BUILD\fP
|
||||||
|
.\"
|
||||||
|
files in the source distribution.
|
||||||
.P
|
.P
|
||||||
The libraries contains a number of undocumented internal functions and data
|
The libraries contains a number of undocumented internal functions and data
|
||||||
tables that are used by more than one of the exported external functions, but
|
tables that are used by more than one of the exported external functions, but
|
||||||
@ -121,8 +131,11 @@ checked for UTF-8 validity. If the data string is very long, such a check might
|
|||||||
use sufficiently many resources as to cause your application to lose
|
use sufficiently many resources as to cause your application to lose
|
||||||
performance.
|
performance.
|
||||||
.P
|
.P
|
||||||
The best way of guarding against this possibility is to use the
|
One way of guarding against this possibility is to use the
|
||||||
\fBpcre_fullinfo()\fP function to check the compiled pattern's options for UTF.
|
\fBpcre_fullinfo()\fP function to check the compiled pattern's options for UTF.
|
||||||
|
Alternatively, from release 8.33, you can set the PCRE_NEVER_UTF option at
|
||||||
|
compile time. This causes an compile time error if a pattern contains a
|
||||||
|
UTF-setting sequence.
|
||||||
.P
|
.P
|
||||||
If your application is one that supports UTF, be aware that validity checking
|
If your application is one that supports UTF, be aware that validity checking
|
||||||
can take time. If the same data string is to be matched many times, you can use
|
can take time. If the same data string is to be matched many times, you can use
|
||||||
@ -145,15 +158,18 @@ page.
|
|||||||
The user documentation for PCRE comprises a number of different sections. In
|
The user documentation for PCRE comprises a number of different sections. In
|
||||||
the "man" format, each of these is a separate "man page". In the HTML format,
|
the "man" format, each of these is a separate "man page". In the HTML format,
|
||||||
each is a separate page, linked from the index page. In the plain text format,
|
each is a separate page, linked from the index page. In the plain text format,
|
||||||
all the sections, except the \fBpcredemo\fP section, are concatenated, for ease
|
the descriptions of the \fBpcregrep\fP and \fBpcretest\fP programs are in files
|
||||||
of searching. The sections are as follows:
|
called \fBpcregrep.txt\fP and \fBpcretest.txt\fP, respectively. The remaining
|
||||||
|
sections, except for the \fBpcredemo\fP section (which is a program listing),
|
||||||
|
are concatenated in \fBpcre.txt\fP, for ease of searching. The sections are as
|
||||||
|
follows:
|
||||||
.sp
|
.sp
|
||||||
pcre this document
|
pcre this document
|
||||||
|
pcre-config show PCRE installation configuration information
|
||||||
pcre16 details of the 16-bit library
|
pcre16 details of the 16-bit library
|
||||||
pcre32 details of the 32-bit library
|
pcre32 details of the 32-bit library
|
||||||
pcre-config show PCRE installation configuration information
|
|
||||||
pcreapi details of PCRE's native C API
|
pcreapi details of PCRE's native C API
|
||||||
pcrebuild options for building PCRE
|
pcrebuild building PCRE
|
||||||
pcrecallout details of the callout feature
|
pcrecallout details of the callout feature
|
||||||
pcrecompat discussion of Perl compatibility
|
pcrecompat discussion of Perl compatibility
|
||||||
pcrecpp details of the C++ wrapper for the 8-bit library
|
pcrecpp details of the C++ wrapper for the 8-bit library
|
||||||
@ -175,8 +191,8 @@ of searching. The sections are as follows:
|
|||||||
pcretest description of the \fBpcretest\fP testing command
|
pcretest description of the \fBpcretest\fP testing command
|
||||||
pcreunicode discussion of Unicode and UTF-8/16/32 support
|
pcreunicode discussion of Unicode and UTF-8/16/32 support
|
||||||
.sp
|
.sp
|
||||||
In addition, in the "man" and HTML formats, there is a short page for each
|
In the "man" and HTML formats, there is also a short page for each C library
|
||||||
C library function, listing its arguments and results.
|
function, listing its arguments and results.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH AUTHOR
|
.SH AUTHOR
|
||||||
@ -197,6 +213,6 @@ two digits 10, at the domain cam.ac.uk.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 11 November 2012
|
Last updated: 08 January 2014
|
||||||
Copyright (c) 1997-2012 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE 3 "08 November 2012" "PCRE 8.32"
|
.TH PCRE 3 "12 May 2013" "PCRE 8.33"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.sp
|
.sp
|
||||||
@ -8,140 +8,120 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.SH "PCRE 16-BIT API BASIC FUNCTIONS"
|
.SH "PCRE 16-BIT API BASIC FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.SM
|
.nf
|
||||||
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " int *\fIerrorcodeptr\fP,"
|
||||||
.B int *\fIerrorcodeptr\fP,
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.ti +5n
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
|
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP);"
|
||||||
.B const char **\fIerrptr\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B void pcre16_free_study(pcre16_extra *\fIextra\fP);
|
.B void pcre16_free_study(pcre16_extra *\fIextra\fP);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 16-BIT API STRING EXTRACTION FUNCTIONS"
|
.SH "PCRE 16-BIT API STRING EXTRACTION FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
|
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
|
||||||
.ti +5n
|
.B " PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||||
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,
|
.B " int \fIbuffersize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
|
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 *\fIstringptr\fP);"
|
||||||
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B PCRE_SPTR16 *\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
|
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIname\fP);
|
||||||
.B PCRE_SPTR16 \fIname\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
|
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);"
|
||||||
.B PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
.B " PCRE_SPTR16 *\fIstringptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B PCRE_SPTR16 *\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
|
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
|
||||||
.ti +5n
|
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR16 **\fIlistptr\fP);"
|
||||||
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR16 **\fIlistptr\fP);"
|
.sp
|
||||||
.PP
|
|
||||||
.B void pcre16_free_substring(PCRE_SPTR16 \fIstringptr\fP);
|
.B void pcre16_free_substring(PCRE_SPTR16 \fIstringptr\fP);
|
||||||
.PP
|
.sp
|
||||||
.B void pcre16_free_substring_list(PCRE_SPTR16 *\fIstringptr\fP);
|
.B void pcre16_free_substring_list(PCRE_SPTR16 *\fIstringptr\fP);
|
||||||
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 16-BIT API AUXILIARY FUNCTIONS"
|
.SH "PCRE 16-BIT API AUXILIARY FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
||||||
.PP
|
.sp
|
||||||
.B void pcre16_jit_stack_free(pcre16_jit_stack *\fIstack\fP);
|
.B void pcre16_jit_stack_free(pcre16_jit_stack *\fIstack\fP);
|
||||||
.PP
|
.sp
|
||||||
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
|
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
|
||||||
.ti +5n
|
.B " pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||||
.B pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B const unsigned char *pcre16_maketables(void);
|
.B const unsigned char *pcre16_maketables(void);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre16_refcount(pcre16 *\fIcode\fP, int \fIadjust\fP);
|
.B int pcre16_refcount(pcre16 *\fIcode\fP, int \fIadjust\fP);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre16_config(int \fIwhat\fP, void *\fIwhere\fP);
|
.B int pcre16_config(int \fIwhat\fP, void *\fIwhere\fP);
|
||||||
.PP
|
.sp
|
||||||
.B const char *pcre16_version(void);
|
.B const char *pcre16_version(void);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
|
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||||
.B pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 16-BIT API INDIRECTED FUNCTIONS"
|
.SH "PCRE 16-BIT API INDIRECTED FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B void *(*pcre16_malloc)(size_t);
|
.B void *(*pcre16_malloc)(size_t);
|
||||||
.PP
|
.sp
|
||||||
.B void (*pcre16_free)(void *);
|
.B void (*pcre16_free)(void *);
|
||||||
.PP
|
.sp
|
||||||
.B void *(*pcre16_stack_malloc)(size_t);
|
.B void *(*pcre16_stack_malloc)(size_t);
|
||||||
.PP
|
.sp
|
||||||
.B void (*pcre16_stack_free)(void *);
|
.B void (*pcre16_stack_free)(void *);
|
||||||
.PP
|
.sp
|
||||||
.B int (*pcre16_callout)(pcre16_callout_block *);
|
.B int (*pcre16_callout)(pcre16_callout_block *);
|
||||||
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 16-BIT API 16-BIT-ONLY FUNCTION"
|
.SH "PCRE 16-BIT API 16-BIT-ONLY FUNCTION"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
|
.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,"
|
||||||
.B PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,
|
.B " int \fIkeep_boms\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B int \fIkeep_boms\fP);
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "THE PCRE 16-BIT LIBRARY"
|
.SH "THE PCRE 16-BIT LIBRARY"
|
||||||
@ -246,8 +226,9 @@ buffer, including the zero terminator if the string was zero-terminated.
|
|||||||
.SH "SUBJECT STRING OFFSETS"
|
.SH "SUBJECT STRING OFFSETS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
The offsets within subject strings that are returned by the matching functions
|
The lengths and starting offsets of subject strings must be specified in 16-bit
|
||||||
are in 16-bit units rather than bytes.
|
data units, and the offsets within subject strings that are returned by the
|
||||||
|
matching functions are in also 16-bit units rather than bytes.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "NAMED SUBPATTERNS"
|
.SH "NAMED SUBPATTERNS"
|
||||||
@ -385,6 +366,6 @@ Cambridge CB2 3QH, England.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 08 November 2012
|
Last updated: 12 May 2013
|
||||||
Copyright (c) 1997-2012 University of Cambridge.
|
Copyright (c) 1997-2013 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE 3 "08 November 2012" "PCRE 8.32"
|
.TH PCRE 3 "12 May 2013" "PCRE 8.33"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.sp
|
.sp
|
||||||
@ -8,140 +8,119 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.SH "PCRE 32-BIT API BASIC FUNCTIONS"
|
.SH "PCRE 32-BIT API BASIC FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.SM
|
.nf
|
||||||
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " int *\fIerrorcodeptr\fP,"
|
||||||
.B int *\fIerrorcodeptr\fP,
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
|
||||||
.ti +5n
|
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
|
.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP);"
|
||||||
.B const char **\fIerrptr\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B void pcre32_free_study(pcre32_extra *\fIextra\fP);
|
.B void pcre32_free_study(pcre32_extra *\fIextra\fP);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 32-BIT API STRING EXTRACTION FUNCTIONS"
|
.SH "PCRE 32-BIT API STRING EXTRACTION FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
|
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
|
||||||
.ti +5n
|
.B " PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||||
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,
|
.B " int \fIbuffersize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
|
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 *\fIstringptr\fP);"
|
||||||
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B PCRE_SPTR32 *\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
|
.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIname\fP);"
|
||||||
.B PCRE_SPTR32 \fIname\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
|
.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);"
|
||||||
.B PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
.B " PCRE_SPTR32 *\fIstringptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B PCRE_SPTR32 *\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
|
.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
|
||||||
.ti +5n
|
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR32 **\fIlistptr\fP);"
|
||||||
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR32 **\fIlistptr\fP);"
|
.sp
|
||||||
.PP
|
|
||||||
.B void pcre32_free_substring(PCRE_SPTR32 \fIstringptr\fP);
|
.B void pcre32_free_substring(PCRE_SPTR32 \fIstringptr\fP);
|
||||||
.PP
|
.sp
|
||||||
.B void pcre32_free_substring_list(PCRE_SPTR32 *\fIstringptr\fP);
|
.B void pcre32_free_substring_list(PCRE_SPTR32 *\fIstringptr\fP);
|
||||||
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 32-BIT API AUXILIARY FUNCTIONS"
|
.SH "PCRE 32-BIT API AUXILIARY FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
||||||
.PP
|
.sp
|
||||||
.B void pcre32_jit_stack_free(pcre32_jit_stack *\fIstack\fP);
|
.B void pcre32_jit_stack_free(pcre32_jit_stack *\fIstack\fP);
|
||||||
.PP
|
.sp
|
||||||
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
|
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
|
||||||
.ti +5n
|
.B " pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||||
.B pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B const unsigned char *pcre32_maketables(void);
|
.B const unsigned char *pcre32_maketables(void);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre32_refcount(pcre32 *\fIcode\fP, int \fIadjust\fP);
|
.B int pcre32_refcount(pcre32 *\fIcode\fP, int \fIadjust\fP);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre32_config(int \fIwhat\fP, void *\fIwhere\fP);
|
.B int pcre32_config(int \fIwhat\fP, void *\fIwhere\fP);
|
||||||
.PP
|
.sp
|
||||||
.B const char *pcre32_version(void);
|
.B const char *pcre32_version(void);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
|
.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||||
.B pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 32-BIT API INDIRECTED FUNCTIONS"
|
.SH "PCRE 32-BIT API INDIRECTED FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B void *(*pcre32_malloc)(size_t);
|
.B void *(*pcre32_malloc)(size_t);
|
||||||
.PP
|
.sp
|
||||||
.B void (*pcre32_free)(void *);
|
.B void (*pcre32_free)(void *);
|
||||||
.PP
|
.sp
|
||||||
.B void *(*pcre32_stack_malloc)(size_t);
|
.B void *(*pcre32_stack_malloc)(size_t);
|
||||||
.PP
|
.sp
|
||||||
.B void (*pcre32_stack_free)(void *);
|
.B void (*pcre32_stack_free)(void *);
|
||||||
.PP
|
.sp
|
||||||
.B int (*pcre32_callout)(pcre32_callout_block *);
|
.B int (*pcre32_callout)(pcre32_callout_block *);
|
||||||
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 32-BIT API 32-BIT-ONLY FUNCTION"
|
.SH "PCRE 32-BIT API 32-BIT-ONLY FUNCTION"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
|
.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,"
|
||||||
.B PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,
|
.B " int \fIkeep_boms\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B int \fIkeep_boms\fP);
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "THE PCRE 32-BIT LIBRARY"
|
.SH "THE PCRE 32-BIT LIBRARY"
|
||||||
@ -246,8 +225,9 @@ buffer, including the zero terminator if the string was zero-terminated.
|
|||||||
.SH "SUBJECT STRING OFFSETS"
|
.SH "SUBJECT STRING OFFSETS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
The offsets within subject strings that are returned by the matching functions
|
The lengths and starting offsets of subject strings must be specified in 32-bit
|
||||||
are in 32-bit units rather than bytes.
|
data units, and the offsets within subject strings that are returned by the
|
||||||
|
matching functions are in also 32-bit units rather than bytes.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "NAMED SUBPATTERNS"
|
.SH "NAMED SUBPATTERNS"
|
||||||
@ -384,6 +364,6 @@ Cambridge CB2 3QH, England.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 08 November 2012
|
Last updated: 12 May 2013
|
||||||
Copyright (c) 1997-2012 University of Cambridge.
|
Copyright (c) 1997-2013 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP,
|
.B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP,
|
||||||
.ti +5n
|
.B " pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||||
.B pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
|
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
|
||||||
.ti +5n
|
.B " pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||||
.B pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
|
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
|
||||||
.ti +5n
|
.B " pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||||
.B pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_COMPILE 3 "24 June 2012" "PCRE 8.30"
|
.TH PCRE_COMPILE 3 "01 October 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
|
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
@ -56,6 +51,7 @@ The option bits are:
|
|||||||
PCRE_FIRSTLINE Force matching to be before newline
|
PCRE_FIRSTLINE Force matching to be before newline
|
||||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||||
PCRE_MULTILINE ^ and $ match newlines within data
|
PCRE_MULTILINE ^ and $ match newlines within data
|
||||||
|
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
|
||||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||||
sequences
|
sequences
|
||||||
@ -64,6 +60,8 @@ The option bits are:
|
|||||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||||
theses (named ones available)
|
theses (named ones available)
|
||||||
|
PCRE_NO_AUTO_POSSESS Disable auto-possessification
|
||||||
|
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||||
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||||
validity (only relevant if
|
validity (only relevant if
|
||||||
PCRE_UTF16 is set)
|
PCRE_UTF16 is set)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_COMPILE2 3 "24 June 2012" "PCRE 8.30"
|
.TH PCRE_COMPILE2 3 "01 October 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
|
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " int *\fIerrorcodeptr\fP,"
|
||||||
.B int *\fIerrorcodeptr\fP,
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.ti +5n
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " int *\fIerrorcodeptr\fP,"
|
||||||
.B int *\fIerrorcodeptr\fP,
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.ti +5n
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " int *\fIerrorcodeptr\fP,£
|
||||||
.B int *\fIerrorcodeptr\fP,
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.ti +5n
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
@ -64,6 +56,7 @@ The option bits are:
|
|||||||
PCRE_FIRSTLINE Force matching to be before newline
|
PCRE_FIRSTLINE Force matching to be before newline
|
||||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||||
PCRE_MULTILINE ^ and $ match newlines within data
|
PCRE_MULTILINE ^ and $ match newlines within data
|
||||||
|
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
|
||||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||||
sequences
|
sequences
|
||||||
@ -72,6 +65,8 @@ The option bits are:
|
|||||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||||
theses (named ones available)
|
theses (named ones available)
|
||||||
|
PCRE_NO_AUTO_POSSESS Disable auto-possessification
|
||||||
|
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||||
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||||
validity (only relevant if
|
validity (only relevant if
|
||||||
PCRE_UTF16 is set)
|
PCRE_UTF16 is set)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_CONFIG 3 "24 June 2012" "PCRE 8.30"
|
.TH PCRE_CONFIG 3 "05 November 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -33,6 +33,7 @@ point to an unsigned long integer. The available codes are:
|
|||||||
target architecture for the JIT compiler,
|
target architecture for the JIT compiler,
|
||||||
or NULL if there is no JIT support
|
or NULL if there is no JIT support
|
||||||
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
||||||
|
PCRE_CONFIG_PARENS_LIMIT Parentheses nesting limit
|
||||||
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
||||||
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
||||||
Internal recursion depth limit
|
Internal recursion depth limit
|
||||||
|
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
|
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
|
||||||
.ti +5n
|
.B " char *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B char *\fIbuffer\fP, int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
|
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
|
||||||
.ti +5n
|
.B " PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||||
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
|
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
|
||||||
.ti +5n
|
.B " PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||||
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
|
.B " int \fIbuffersize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,
|
.B " int \fIbuffersize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,
|
.B " int \fIbuffersize\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B int \fIbuffersize\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_DFA_EXEC 3 "24 June 2012" "PCRE 8.30"
|
.TH PCRE_DFA_EXEC 3 "12 May 2013" "PCRE 8.33"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
@ -44,16 +36,17 @@ are:
|
|||||||
\fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure,
|
\fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure,
|
||||||
or is NULL
|
or is NULL
|
||||||
\fIsubject\fP Points to the subject string
|
\fIsubject\fP Points to the subject string
|
||||||
\fIlength\fP Length of the subject string, in bytes
|
\fIlength\fP Length of the subject string
|
||||||
\fIstartoffset\fP Offset in bytes in the subject at which to
|
\fIstartoffset\fP Offset in the subject at which to start matching
|
||||||
start matching
|
|
||||||
\fIoptions\fP Option bits
|
\fIoptions\fP Option bits
|
||||||
\fIovector\fP Points to a vector of ints for result offsets
|
\fIovector\fP Points to a vector of ints for result offsets
|
||||||
\fIovecsize\fP Number of elements in the vector
|
\fIovecsize\fP Number of elements in the vector
|
||||||
\fIworkspace\fP Points to a vector of ints used as working space
|
\fIworkspace\fP Points to a vector of ints used as working space
|
||||||
\fIwscount\fP Number of elements in the vector
|
\fIwscount\fP Number of elements in the vector
|
||||||
.sp
|
.sp
|
||||||
The options are:
|
The units for \fIlength\fP and \fIstartoffset\fP are bytes for
|
||||||
|
\fBpcre_exec()\fP, 16-bit data items for \fBpcre16_exec()\fP, and 32-bit items
|
||||||
|
for \fBpcre32_exec()\fP. The options are:
|
||||||
.sp
|
.sp
|
||||||
PCRE_ANCHORED Match only at the first position
|
PCRE_ANCHORED Match only at the first position
|
||||||
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
|
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_EXEC 3 "24 June 2012" "PCRE 8.30"
|
.TH PCRE_EXEC 3 "12 May 2013" "PCRE 8.33"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
@ -36,14 +31,15 @@ offsets to captured substrings. Its arguments are:
|
|||||||
\fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure,
|
\fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure,
|
||||||
or is NULL
|
or is NULL
|
||||||
\fIsubject\fP Points to the subject string
|
\fIsubject\fP Points to the subject string
|
||||||
\fIlength\fP Length of the subject string, in bytes
|
\fIlength\fP Length of the subject string
|
||||||
\fIstartoffset\fP Offset in bytes in the subject at which to
|
\fIstartoffset\fP Offset in the subject at which to start matching
|
||||||
start matching
|
|
||||||
\fIoptions\fP Option bits
|
\fIoptions\fP Option bits
|
||||||
\fIovector\fP Points to a vector of ints for result offsets
|
\fIovector\fP Points to a vector of ints for result offsets
|
||||||
\fIovecsize\fP Number of elements in the vector (a multiple of 3)
|
\fIovecsize\fP Number of elements in the vector (a multiple of 3)
|
||||||
.sp
|
.sp
|
||||||
The options are:
|
The units for \fIlength\fP and \fIstartoffset\fP are bytes for
|
||||||
|
\fBpcre_exec()\fP, 16-bit data items for \fBpcre16_exec()\fP, and 32-bit items
|
||||||
|
for \fBpcre32_exec()\fP. The options are:
|
||||||
.sp
|
.sp
|
||||||
PCRE_ANCHORED Match only at the first position
|
PCRE_ANCHORED Match only at the first position
|
||||||
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
|
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
|
||||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
|
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
|
||||||
.ti +5n
|
.B " const char **\fIstringptr\fP);"
|
||||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B const char **\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
|
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 *\fIstringptr\fP);"
|
||||||
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B PCRE_SPTR16 *\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
|
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 *\fIstringptr\fP);"
|
||||||
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B PCRE_SPTR32 *\fIstringptr\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
|
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " const char *\fIname\fP);"
|
||||||
.B const char *\fIname\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
|
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIname\fP);"
|
||||||
.B PCRE_SPTR16 \fIname\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
|
.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIname\fP);"
|
||||||
.B PCRE_SPTR32 \fIname\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
|
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);"
|
||||||
.B const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
|
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);"
|
||||||
.B PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
|
.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);"
|
||||||
.B PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
.B " const char **\fIstringptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B const char **\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
.B " PCRE_SPTR16 *\fIstringptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B PCRE_SPTR16 *\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
.B " PCRE_SPTR32 *\fIstringptr\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B PCRE_SPTR32 *\fIstringptr\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_get_substring_list(const char *\fIsubject\fP,
|
.B int pcre_get_substring_list(const char *\fIsubject\fP,
|
||||||
.ti +5n
|
.B " int *\fIovector\fP, int \fIstringcount\fP, const char ***\fIlistptr\fP);"
|
||||||
.B int *\fIovector\fP, int \fIstringcount\fP, "const char ***\fIlistptr\fP);"
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
|
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
|
||||||
.ti +5n
|
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR16 **\fIlistptr\fP);"
|
||||||
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR16 **\fIlistptr\fP);"
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
|
.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
|
||||||
.ti +5n
|
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR32 **\fIlistptr\fP);"
|
||||||
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR32 **\fIlistptr\fP);"
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_jit_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
.B int pcre_jit_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " pcre_jit_stack *\fIjstack\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B pcre_jit_stack *\fIjstack\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_jit_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
.B int pcre16_jit_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " pcre_jit_stack *\fIjstack\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B pcre_jit_stack *\fIjstack\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_jit_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
.B int pcre32_jit_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " pcre_jit_stack *\fIjstack\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B pcre_jit_stack *\fIjstack\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B pcre_jit_stack *pcre_jit_stack_alloc(int \fIstartsize\fP,
|
.B pcre_jit_stack *pcre_jit_stack_alloc(int \fIstartsize\fP,
|
||||||
.ti +5n
|
.B " int \fImaxsize\fP);"
|
||||||
.B int \fImaxsize\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP,
|
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP,
|
||||||
.ti +5n
|
.B " int \fImaxsize\fP);"
|
||||||
.B int \fImaxsize\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP,
|
.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP,
|
||||||
.ti +5n
|
.B " int \fImaxsize\fP);"
|
||||||
.B int \fImaxsize\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_pattern_to_host_byte_order(pcre *\fIcode\fP,
|
.B int pcre_pattern_to_host_byte_order(pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||||
.B pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
|
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||||
.B pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
|
.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||||
.B pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
|
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP);"
|
||||||
.B const char **\fIerrptr\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
|
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP);"
|
||||||
.B const char **\fIerrptr\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
|
.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP);"
|
||||||
.B const char **\fIerrptr\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,12 +6,11 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
|
.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,"
|
||||||
.B PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,
|
.B " int \fIkeep_boms\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B int \fIkeep_boms\fP);
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
|
@ -6,12 +6,11 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
|
.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,"
|
||||||
.B PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,
|
.B " int \fIkeep_boms\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B int \fIkeep_boms\fP);
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,24 +1,54 @@
|
|||||||
.TH PCREBUILD 3 "30 October 2012" "PCRE 8.32"
|
.TH PCREBUILD 3 "12 May 2013" "PCRE 8.33"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.SH "BUILDING PCRE"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
PCRE is distributed with a \fBconfigure\fP script that can be used to build the
|
||||||
|
library in Unix-like environments using the applications known as Autotools.
|
||||||
|
Also in the distribution are files to support building using \fBCMake\fP
|
||||||
|
instead of \fBconfigure\fP. The text file
|
||||||
|
.\" HTML <a href="README.txt">
|
||||||
|
.\" </a>
|
||||||
|
\fBREADME\fP
|
||||||
|
.\"
|
||||||
|
contains general information about building with Autotools (some of which is
|
||||||
|
repeated below), and also has some comments about building on various operating
|
||||||
|
systems. There is a lot more information about building PCRE without using
|
||||||
|
Autotools (including information about using \fBCMake\fP and building "by
|
||||||
|
hand") in the text file called
|
||||||
|
.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt">
|
||||||
|
.\" </a>
|
||||||
|
\fBNON-AUTOTOOLS-BUILD\fP.
|
||||||
|
.\"
|
||||||
|
You should consult this file as well as the
|
||||||
|
.\" HTML <a href="README.txt">
|
||||||
|
.\" </a>
|
||||||
|
\fBREADME\fP
|
||||||
|
.\"
|
||||||
|
file if you are building in a non-Unix-like environment.
|
||||||
|
.
|
||||||
|
.
|
||||||
.SH "PCRE BUILD-TIME OPTIONS"
|
.SH "PCRE BUILD-TIME OPTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
This document describes the optional features of PCRE that can be selected when
|
The rest of this document describes the optional features of PCRE that can be
|
||||||
the library is compiled. It assumes use of the \fBconfigure\fP script, where
|
selected when the library is compiled. It assumes use of the \fBconfigure\fP
|
||||||
the optional features are selected or deselected by providing options to
|
script, where the optional features are selected or deselected by providing
|
||||||
\fBconfigure\fP before running the \fBmake\fP command. However, the same
|
options to \fBconfigure\fP before running the \fBmake\fP command. However, the
|
||||||
options can be selected in both Unix-like and non-Unix-like environments using
|
same options can be selected in both Unix-like and non-Unix-like environments
|
||||||
the GUI facility of \fBcmake-gui\fP if you are using \fBCMake\fP instead of
|
using the GUI facility of \fBcmake-gui\fP if you are using \fBCMake\fP instead
|
||||||
\fBconfigure\fP to build PCRE.
|
of \fBconfigure\fP to build PCRE.
|
||||||
.P
|
.P
|
||||||
There is a lot more information about building PCRE without using
|
If you are not using Autotools or \fBCMake\fP, option selection can be done by
|
||||||
\fBconfigure\fP (including information about using \fBCMake\fP or building "by
|
editing the \fBconfig.h\fP file, or by passing parameter settings to the
|
||||||
hand") in the file called \fINON-AUTOTOOLS-BUILD\fP, which is part of the PCRE
|
compiler, as described in
|
||||||
distribution. You should consult this file as well as the \fIREADME\fP file if
|
.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt">
|
||||||
you are building in a non-Unix-like environment.
|
.\" </a>
|
||||||
|
\fBNON-AUTOTOOLS-BUILD\fP.
|
||||||
|
.\"
|
||||||
.P
|
.P
|
||||||
The complete list of options for \fBconfigure\fP (which includes the standard
|
The complete list of options for \fBconfigure\fP (which includes the standard
|
||||||
ones such as the selection of the installation directory) can be obtained by
|
ones such as the selection of the installation directory) can be obtained by
|
||||||
@ -45,7 +75,7 @@ strings, by adding
|
|||||||
.sp
|
.sp
|
||||||
--enable-pcre16
|
--enable-pcre16
|
||||||
.sp
|
.sp
|
||||||
to the \fBconfigure\fP command. You can also build a separate
|
to the \fBconfigure\fP command. You can also build yet another separate
|
||||||
library, called \fBlibpcre32\fP, in which strings are contained in vectors of
|
library, called \fBlibpcre32\fP, in which strings are contained in vectors of
|
||||||
32-bit data units and interpreted either as single-unit characters or UTF-32
|
32-bit data units and interpreted either as single-unit characters or UTF-32
|
||||||
strings, by adding
|
strings, by adding
|
||||||
@ -65,8 +95,8 @@ an 8-bit program. None of these are built if you select only the 16-bit or
|
|||||||
.SH "BUILDING SHARED AND STATIC LIBRARIES"
|
.SH "BUILDING SHARED AND STATIC LIBRARIES"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
The PCRE building process uses \fBlibtool\fP to build both shared and static
|
The Autotools PCRE building process uses \fBlibtool\fP to build both shared and
|
||||||
Unix libraries by default. You can suppress one of these by adding one of
|
static libraries by default. You can suppress one of these by adding one of
|
||||||
.sp
|
.sp
|
||||||
--disable-shared
|
--disable-shared
|
||||||
--disable-static
|
--disable-static
|
||||||
@ -515,6 +545,6 @@ Cambridge CB2 3QH, England.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 30 October 2012
|
Last updated: 12 May 2013
|
||||||
Copyright (c) 1997-2012 University of Cambridge.
|
Copyright (c) 1997-2013 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRECALLOUT 3 "24 June 2012" "PCRE 8.30"
|
.TH PCRECALLOUT 3 "12 November 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -41,26 +41,64 @@ it is processed as if it were
|
|||||||
(?C255)A(?C255)((?C255)\ed{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
|
(?C255)A(?C255)((?C255)\ed{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
|
||||||
.sp
|
.sp
|
||||||
Notice that there is a callout before and after each parenthesis and
|
Notice that there is a callout before and after each parenthesis and
|
||||||
alternation bar. Automatic callouts can be used for tracking the progress of
|
alternation bar. If the pattern contains a conditional group whose condition is
|
||||||
pattern matching. The
|
an assertion, an automatic callout is inserted immediately before the
|
||||||
|
condition. Such a callout may also be inserted explicitly, for example:
|
||||||
|
.sp
|
||||||
|
(?(?C9)(?=a)ab|de)
|
||||||
|
.sp
|
||||||
|
This applies only to assertion conditions (because they are themselves
|
||||||
|
independent groups).
|
||||||
|
.P
|
||||||
|
Automatic callouts can be used for tracking the progress of pattern matching.
|
||||||
|
The
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcretest\fP
|
\fBpcretest\fP
|
||||||
.\"
|
.\"
|
||||||
command has an option that sets automatic callouts; when it is used, the output
|
program has a pattern qualifier (/C) that sets automatic callouts; when it is
|
||||||
indicates how the pattern is matched. This is useful information when you are
|
used, the output indicates how the pattern is being matched. This is useful
|
||||||
trying to optimize the performance of a particular pattern.
|
information when you are trying to optimize the performance of a particular
|
||||||
.P
|
pattern.
|
||||||
The use of callouts in a pattern makes it ineligible for optimization by the
|
|
||||||
just-in-time compiler. Studying such a pattern with the PCRE_STUDY_JIT_COMPILE
|
|
||||||
option always fails.
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "MISSING CALLOUTS"
|
.SH "MISSING CALLOUTS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
You should be aware that, because of optimizations in the way PCRE matches
|
You should be aware that, because of optimizations in the way PCRE compiles and
|
||||||
patterns by default, callouts sometimes do not happen. For example, if the
|
matches patterns, callouts sometimes do not happen exactly as you might expect.
|
||||||
pattern is
|
.P
|
||||||
|
At compile time, PCRE "auto-possessifies" repeated items when it knows that
|
||||||
|
what follows cannot be part of the repeat. For example, a+[bc] is compiled as
|
||||||
|
if it were a++[bc]. The \fBpcretest\fP output when this pattern is anchored and
|
||||||
|
then applied with automatic callouts to the string "aaaa" is:
|
||||||
|
.sp
|
||||||
|
--->aaaa
|
||||||
|
+0 ^ ^
|
||||||
|
+1 ^ a+
|
||||||
|
+3 ^ ^ [bc]
|
||||||
|
No match
|
||||||
|
.sp
|
||||||
|
This indicates that when matching [bc] fails, there is no backtracking into a+
|
||||||
|
and therefore the callouts that would be taken for the backtracks do not occur.
|
||||||
|
You can disable the auto-possessify feature by passing PCRE_NO_AUTO_POSSESS
|
||||||
|
to \fBpcre_compile()\fP, or starting the pattern with (*NO_AUTO_POSSESS). If
|
||||||
|
this is done in \fBpcretest\fP (using the /O qualifier), the output changes to
|
||||||
|
this:
|
||||||
|
.sp
|
||||||
|
--->aaaa
|
||||||
|
+0 ^ ^
|
||||||
|
+1 ^ a+
|
||||||
|
+3 ^ ^ [bc]
|
||||||
|
+3 ^ ^ [bc]
|
||||||
|
+3 ^ ^ [bc]
|
||||||
|
+3 ^^ [bc]
|
||||||
|
No match
|
||||||
|
.sp
|
||||||
|
This time, when matching [bc] fails, the matcher backtracks into a+ and tries
|
||||||
|
again, repeatedly, until a+ itself fails.
|
||||||
|
.P
|
||||||
|
Other optimizations that provide fast "no match" results also affect callouts.
|
||||||
|
For example, if the pattern is
|
||||||
.sp
|
.sp
|
||||||
ab(?C4)cd
|
ab(?C4)cd
|
||||||
.sp
|
.sp
|
||||||
@ -84,11 +122,11 @@ callouts such as the example above are obeyed.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
During matching, when PCRE reaches a callout point, the external function
|
During matching, when PCRE reaches a callout point, the external function
|
||||||
defined by \fIpcre_callout\fP or \fIpcre[16|32]_callout\fP is called
|
defined by \fIpcre_callout\fP or \fIpcre[16|32]_callout\fP is called (if it is
|
||||||
(if it is set). This applies to both normal and DFA matching. The only
|
set). This applies to both normal and DFA matching. The only argument to the
|
||||||
argument to the callout function is a pointer to a \fBpcre_callout\fP
|
callout function is a pointer to a \fBpcre_callout\fP or
|
||||||
or \fBpcre[16|32]_callout\fP block.
|
\fBpcre[16|32]_callout\fP block. These structures contains the following
|
||||||
These structures contains the following fields:
|
fields:
|
||||||
.sp
|
.sp
|
||||||
int \fIversion\fP;
|
int \fIversion\fP;
|
||||||
int \fIcallout_number\fP;
|
int \fIcallout_number\fP;
|
||||||
@ -119,10 +157,10 @@ automatically generated callouts).
|
|||||||
.P
|
.P
|
||||||
The \fIoffset_vector\fP field is a pointer to the vector of offsets that was
|
The \fIoffset_vector\fP field is a pointer to the vector of offsets that was
|
||||||
passed by the caller to the matching function. When \fBpcre_exec()\fP or
|
passed by the caller to the matching function. When \fBpcre_exec()\fP or
|
||||||
\fBpcre[16|32]_exec()\fP is used, the contents can be inspected, in order to extract
|
\fBpcre[16|32]_exec()\fP is used, the contents can be inspected, in order to
|
||||||
substrings that have been matched so far, in the same way as for extracting
|
extract substrings that have been matched so far, in the same way as for
|
||||||
substrings after a match has completed. For the DFA matching functions, this
|
extracting substrings after a match has completed. For the DFA matching
|
||||||
field is not useful.
|
functions, this field is not useful.
|
||||||
.P
|
.P
|
||||||
The \fIsubject\fP and \fIsubject_length\fP fields contain copies of the values
|
The \fIsubject\fP and \fIsubject_length\fP fields contain copies of the values
|
||||||
that were passed to the matching function.
|
that were passed to the matching function.
|
||||||
@ -144,8 +182,10 @@ value of \fIcapture_top\fP is one. This is always the case when the DFA
|
|||||||
functions are used, because they do not support captured substrings.
|
functions are used, because they do not support captured substrings.
|
||||||
.P
|
.P
|
||||||
The \fIcapture_last\fP field contains the number of the most recently captured
|
The \fIcapture_last\fP field contains the number of the most recently captured
|
||||||
substring. If no substrings have been captured, its value is -1. This is always
|
substring. However, when a recursion exits, the value reverts to what it was
|
||||||
the case for the DFA matching functions.
|
outside the recursion, as do the values of all captured substrings. If no
|
||||||
|
substrings have been captured, the value of \fIcapture_last\fP is -1. This is
|
||||||
|
always the case for the DFA matching functions.
|
||||||
.P
|
.P
|
||||||
The \fIcallout_data\fP field contains a value that is passed to a matching
|
The \fIcallout_data\fP field contains a value that is passed to a matching
|
||||||
function specifically so that it can be passed back in callouts. It is passed
|
function specifically so that it can be passed back in callouts. It is passed
|
||||||
@ -173,11 +213,12 @@ help in distinguishing between different automatic callouts, which all have the
|
|||||||
same callout number. However, they are set for all callouts.
|
same callout number. However, they are set for all callouts.
|
||||||
.P
|
.P
|
||||||
The \fImark\fP field is present from version 2 of the callout structure. In
|
The \fImark\fP field is present from version 2 of the callout structure. In
|
||||||
callouts from \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP it contains a pointer to
|
callouts from \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP it contains a
|
||||||
the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
|
pointer to the zero-terminated name of the most recently passed (*MARK),
|
||||||
(*THEN) item in the match, or NULL if no such items have been passed. Instances
|
(*PRUNE), or (*THEN) item in the match, or NULL if no such items have been
|
||||||
of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
|
passed. Instances of (*PRUNE) or (*THEN) without a name do not obliterate a
|
||||||
callouts from the DFA matching functions this field always contains NULL.
|
previous (*MARK). In callouts from the DFA matching functions this field always
|
||||||
|
contains NULL.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "RETURN VALUES"
|
.SH "RETURN VALUES"
|
||||||
@ -209,6 +250,6 @@ Cambridge CB2 3QH, England.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 24 June 2012
|
Last updated: 12 November 2013
|
||||||
Copyright (c) 1997-2012 University of Cambridge.
|
Copyright (c) 1997-2013 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRECOMPAT 3 "24 June 2012" "PCRE 8.30"
|
.TH PCRECOMPAT 3 "10 November 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH "DIFFERENCES BETWEEN PCRE AND PERL"
|
.SH "DIFFERENCES BETWEEN PCRE AND PERL"
|
||||||
@ -23,10 +23,8 @@ just once). Perl allows repeat quantifiers on other assertions such as \eb, but
|
|||||||
these do not seem to have any use.
|
these do not seem to have any use.
|
||||||
.P
|
.P
|
||||||
3. Capturing subpatterns that occur inside negative lookahead assertions are
|
3. Capturing subpatterns that occur inside negative lookahead assertions are
|
||||||
counted, but their entries in the offsets vector are never set. Perl sets its
|
counted, but their entries in the offsets vector are never set. Perl sometimes
|
||||||
numerical variables from any such patterns that are matched before the
|
(but not always) sets its numerical variables from inside negative assertions.
|
||||||
assertion fails to match something (thereby succeeding), but only if the
|
|
||||||
negative lookahead assertion contains just one branch.
|
|
||||||
.P
|
.P
|
||||||
4. Though binary zero characters are supported in the subject string, they are
|
4. Though binary zero characters are supported in the subject string, they are
|
||||||
not allowed in a pattern string because it is passed as a normal C string,
|
not allowed in a pattern string because it is passed as a normal C string,
|
||||||
@ -91,22 +89,28 @@ in the
|
|||||||
.\"
|
.\"
|
||||||
page.
|
page.
|
||||||
.P
|
.P
|
||||||
10. If any of the backtracking control verbs are used in an assertion or in a
|
10. If any of the backtracking control verbs are used in a subpattern that is
|
||||||
subpattern that is called as a subroutine (whether or not recursively), their
|
called as a subroutine (whether or not recursively), their effect is confined
|
||||||
effect is confined to that subpattern; it does not extend to the surrounding
|
to that subpattern; it does not extend to the surrounding pattern. This is not
|
||||||
pattern. This is not always the case in Perl. In particular, if (*THEN) is
|
always the case in Perl. In particular, if (*THEN) is present in a group that
|
||||||
present in a group that is called as a subroutine, its action is limited to
|
is called as a subroutine, its action is limited to that group, even if the
|
||||||
that group, even if the group does not contain any | characters. There is one
|
group does not contain any | characters. Note that such subpatterns are
|
||||||
exception to this: the name from a *(MARK), (*PRUNE), or (*THEN) that is
|
processed as anchored at the point where they are tested.
|
||||||
encountered in a successful positive assertion \fIis\fP passed back when a
|
|
||||||
match succeeds (compare capturing parentheses in assertions). Note that such
|
|
||||||
subpatterns are processed as anchored at the point where they are tested.
|
|
||||||
.P
|
.P
|
||||||
11. There are some differences that are concerned with the settings of captured
|
11. If a pattern contains more than one backtracking control verb, the first
|
||||||
|
one that is backtracked onto acts. For example, in the pattern
|
||||||
|
A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
|
||||||
|
triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
|
||||||
|
same as PCRE, but there are examples where it differs.
|
||||||
|
.P
|
||||||
|
12. Most backtracking verbs in assertions have their normal actions. They are
|
||||||
|
not confined to the assertion.
|
||||||
|
.P
|
||||||
|
13. There are some differences that are concerned with the settings of captured
|
||||||
strings when part of a pattern is repeated. For example, matching "aba" against
|
strings when part of a pattern is repeated. For example, matching "aba" against
|
||||||
the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE it is set to "b".
|
the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE it is set to "b".
|
||||||
.P
|
.P
|
||||||
12. PCRE's handling of duplicate subpattern numbers and duplicate subpattern
|
14. PCRE's handling of duplicate subpattern numbers and duplicate subpattern
|
||||||
names is not as general as Perl's. This is a consequence of the fact the PCRE
|
names is not as general as Perl's. This is a consequence of the fact the PCRE
|
||||||
works internally just with numbers, using an external table to translate
|
works internally just with numbers, using an external table to translate
|
||||||
between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b)B),
|
between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b)B),
|
||||||
@ -116,12 +120,23 @@ would not be possible to distinguish which parentheses matched, because both
|
|||||||
names map to capturing subpattern number 1. To avoid this confusing situation,
|
names map to capturing subpattern number 1. To avoid this confusing situation,
|
||||||
an error is given at compile time.
|
an error is given at compile time.
|
||||||
.P
|
.P
|
||||||
13. Perl recognizes comments in some places that PCRE does not, for example,
|
15. Perl recognizes comments in some places that PCRE does not, for example,
|
||||||
between the ( and ? at the start of a subpattern. If the /x modifier is set,
|
between the ( and ? at the start of a subpattern. If the /x modifier is set,
|
||||||
Perl allows white space between ( and ? but PCRE never does, even if the
|
Perl allows white space between ( and ? (though current Perls warn that this is
|
||||||
PCRE_EXTENDED option is set.
|
deprecated) but PCRE never does, even if the PCRE_EXTENDED option is set.
|
||||||
.P
|
.P
|
||||||
14. PCRE provides some extensions to the Perl regular expression facilities.
|
16. Perl, when in warning mode, gives warnings for character classes such as
|
||||||
|
[A-\ed] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE has no
|
||||||
|
warning features, so it gives an error in these cases because they are almost
|
||||||
|
certainly user mistakes.
|
||||||
|
.P
|
||||||
|
17. In PCRE, the upper/lower case character properties Lu and Ll are not
|
||||||
|
affected when case-independent matching is specified. For example, \ep{Lu}
|
||||||
|
always matches an upper case letter. I think Perl has changed in this respect;
|
||||||
|
in the release at the time of writing (5.16), \ep{Lu} and \ep{Ll} match all
|
||||||
|
letters, regardless of case, when case independence is specified.
|
||||||
|
.P
|
||||||
|
18. PCRE provides some extensions to the Perl regular expression facilities.
|
||||||
Perl 5.10 includes new features that are not in earlier versions of Perl, some
|
Perl 5.10 includes new features that are not in earlier versions of Perl, some
|
||||||
of which (such as named parentheses) have been in PCRE for some time. This list
|
of which (such as named parentheses) have been in PCRE for some time. This list
|
||||||
is with respect to Perl 5.10:
|
is with respect to Perl 5.10:
|
||||||
@ -180,6 +195,6 @@ Cambridge CB2 3QH, England.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 25 August 2012
|
Last updated: 10 November 2013
|
||||||
Copyright (c) 1997-2012 University of Cambridge.
|
Copyright (c) 1997-2013 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
424
tools/pcre/doc/pcredemo.3
Normal file
424
tools/pcre/doc/pcredemo.3
Normal file
@ -0,0 +1,424 @@
|
|||||||
|
.\" Start example.
|
||||||
|
.de EX
|
||||||
|
. nr mE \\n(.f
|
||||||
|
. nf
|
||||||
|
. nh
|
||||||
|
. ft CW
|
||||||
|
..
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.\" End example.
|
||||||
|
.de EE
|
||||||
|
. ft \\n(mE
|
||||||
|
. fi
|
||||||
|
. hy \\n(HY
|
||||||
|
..
|
||||||
|
.
|
||||||
|
.EX
|
||||||
|
/*************************************************
|
||||||
|
* PCRE DEMONSTRATION PROGRAM *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This is a demonstration program to illustrate the most straightforward ways
|
||||||
|
of calling the PCRE regular expression library from a C program. See the
|
||||||
|
pcresample documentation for a short discussion ("man pcresample" if you have
|
||||||
|
the PCRE man pages installed).
|
||||||
|
|
||||||
|
In Unix-like environments, if PCRE is installed in your standard system
|
||||||
|
libraries, you should be able to compile this program using this command:
|
||||||
|
|
||||||
|
gcc -Wall pcredemo.c -lpcre -o pcredemo
|
||||||
|
|
||||||
|
If PCRE is not installed in a standard place, it is likely to be installed with
|
||||||
|
support for the pkg-config mechanism. If you have pkg-config, you can compile
|
||||||
|
this program using this command:
|
||||||
|
|
||||||
|
gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
|
||||||
|
|
||||||
|
If you do not have pkg-config, you may have to use this:
|
||||||
|
|
||||||
|
gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \e
|
||||||
|
-R/usr/local/lib -lpcre -o pcredemo
|
||||||
|
|
||||||
|
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
|
||||||
|
library files for PCRE are installed on your system. Only some operating
|
||||||
|
systems (e.g. Solaris) use the -R option.
|
||||||
|
|
||||||
|
Building under Windows:
|
||||||
|
|
||||||
|
If you want to statically link this program against a non-dll .a file, you must
|
||||||
|
define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
|
||||||
|
pcre_free() exported functions will be declared __declspec(dllimport), with
|
||||||
|
unwanted results. So in this environment, uncomment the following line. */
|
||||||
|
|
||||||
|
/* #define PCRE_STATIC */
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <pcre.h>
|
||||||
|
|
||||||
|
#define OVECCOUNT 30 /* should be a multiple of 3 */
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
pcre *re;
|
||||||
|
const char *error;
|
||||||
|
char *pattern;
|
||||||
|
char *subject;
|
||||||
|
unsigned char *name_table;
|
||||||
|
unsigned int option_bits;
|
||||||
|
int erroffset;
|
||||||
|
int find_all;
|
||||||
|
int crlf_is_newline;
|
||||||
|
int namecount;
|
||||||
|
int name_entry_size;
|
||||||
|
int ovector[OVECCOUNT];
|
||||||
|
int subject_length;
|
||||||
|
int rc, i;
|
||||||
|
int utf8;
|
||||||
|
|
||||||
|
|
||||||
|
/**************************************************************************
|
||||||
|
* First, sort out the command line. There is only one possible option at *
|
||||||
|
* the moment, "-g" to request repeated matching to find all occurrences, *
|
||||||
|
* like Perl's /g option. We set the variable find_all to a non-zero value *
|
||||||
|
* if the -g option is present. Apart from that, there must be exactly two *
|
||||||
|
* arguments. *
|
||||||
|
**************************************************************************/
|
||||||
|
|
||||||
|
find_all = 0;
|
||||||
|
for (i = 1; i < argc; i++)
|
||||||
|
{
|
||||||
|
if (strcmp(argv[i], "-g") == 0) find_all = 1;
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* After the options, we require exactly two arguments, which are the pattern,
|
||||||
|
and the subject string. */
|
||||||
|
|
||||||
|
if (argc - i != 2)
|
||||||
|
{
|
||||||
|
printf("Two arguments required: a regex and a subject string\en");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
pattern = argv[i];
|
||||||
|
subject = argv[i+1];
|
||||||
|
subject_length = (int)strlen(subject);
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************************************
|
||||||
|
* Now we are going to compile the regular expression pattern, and handle *
|
||||||
|
* and errors that are detected. *
|
||||||
|
*************************************************************************/
|
||||||
|
|
||||||
|
re = pcre_compile(
|
||||||
|
pattern, /* the pattern */
|
||||||
|
0, /* default options */
|
||||||
|
&error, /* for error message */
|
||||||
|
&erroffset, /* for error offset */
|
||||||
|
NULL); /* use default character tables */
|
||||||
|
|
||||||
|
/* Compilation failed: print the error message and exit */
|
||||||
|
|
||||||
|
if (re == NULL)
|
||||||
|
{
|
||||||
|
printf("PCRE compilation failed at offset %d: %s\en", erroffset, error);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************************************
|
||||||
|
* If the compilation succeeded, we call PCRE again, in order to do a *
|
||||||
|
* pattern match against the subject string. This does just ONE match. If *
|
||||||
|
* further matching is needed, it will be done below. *
|
||||||
|
*************************************************************************/
|
||||||
|
|
||||||
|
rc = pcre_exec(
|
||||||
|
re, /* the compiled pattern */
|
||||||
|
NULL, /* no extra data - we didn't study the pattern */
|
||||||
|
subject, /* the subject string */
|
||||||
|
subject_length, /* the length of the subject */
|
||||||
|
0, /* start at offset 0 in the subject */
|
||||||
|
0, /* default options */
|
||||||
|
ovector, /* output vector for substring information */
|
||||||
|
OVECCOUNT); /* number of elements in the output vector */
|
||||||
|
|
||||||
|
/* Matching failed: handle error cases */
|
||||||
|
|
||||||
|
if (rc < 0)
|
||||||
|
{
|
||||||
|
switch(rc)
|
||||||
|
{
|
||||||
|
case PCRE_ERROR_NOMATCH: printf("No match\en"); break;
|
||||||
|
/*
|
||||||
|
Handle other special cases if you like
|
||||||
|
*/
|
||||||
|
default: printf("Matching error %d\en", rc); break;
|
||||||
|
}
|
||||||
|
pcre_free(re); /* Release memory used for the compiled pattern */
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Match succeded */
|
||||||
|
|
||||||
|
printf("\enMatch succeeded at offset %d\en", ovector[0]);
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************************************
|
||||||
|
* We have found the first match within the subject string. If the output *
|
||||||
|
* vector wasn't big enough, say so. Then output any substrings that were *
|
||||||
|
* captured. *
|
||||||
|
*************************************************************************/
|
||||||
|
|
||||||
|
/* The output vector wasn't big enough */
|
||||||
|
|
||||||
|
if (rc == 0)
|
||||||
|
{
|
||||||
|
rc = OVECCOUNT/3;
|
||||||
|
printf("ovector only has room for %d captured substrings\en", rc - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Show substrings stored in the output vector by number. Obviously, in a real
|
||||||
|
application you might want to do things other than print them. */
|
||||||
|
|
||||||
|
for (i = 0; i < rc; i++)
|
||||||
|
{
|
||||||
|
char *substring_start = subject + ovector[2*i];
|
||||||
|
int substring_length = ovector[2*i+1] - ovector[2*i];
|
||||||
|
printf("%2d: %.*s\en", i, substring_length, substring_start);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**************************************************************************
|
||||||
|
* That concludes the basic part of this demonstration program. We have *
|
||||||
|
* compiled a pattern, and performed a single match. The code that follows *
|
||||||
|
* shows first how to access named substrings, and then how to code for *
|
||||||
|
* repeated matches on the same subject. *
|
||||||
|
**************************************************************************/
|
||||||
|
|
||||||
|
/* See if there are any named substrings, and if so, show them by name. First
|
||||||
|
we have to extract the count of named parentheses from the pattern. */
|
||||||
|
|
||||||
|
(void)pcre_fullinfo(
|
||||||
|
re, /* the compiled pattern */
|
||||||
|
NULL, /* no extra data - we didn't study the pattern */
|
||||||
|
PCRE_INFO_NAMECOUNT, /* number of named substrings */
|
||||||
|
&namecount); /* where to put the answer */
|
||||||
|
|
||||||
|
if (namecount <= 0) printf("No named substrings\en"); else
|
||||||
|
{
|
||||||
|
unsigned char *tabptr;
|
||||||
|
printf("Named substrings\en");
|
||||||
|
|
||||||
|
/* Before we can access the substrings, we must extract the table for
|
||||||
|
translating names to numbers, and the size of each entry in the table. */
|
||||||
|
|
||||||
|
(void)pcre_fullinfo(
|
||||||
|
re, /* the compiled pattern */
|
||||||
|
NULL, /* no extra data - we didn't study the pattern */
|
||||||
|
PCRE_INFO_NAMETABLE, /* address of the table */
|
||||||
|
&name_table); /* where to put the answer */
|
||||||
|
|
||||||
|
(void)pcre_fullinfo(
|
||||||
|
re, /* the compiled pattern */
|
||||||
|
NULL, /* no extra data - we didn't study the pattern */
|
||||||
|
PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
|
||||||
|
&name_entry_size); /* where to put the answer */
|
||||||
|
|
||||||
|
/* Now we can scan the table and, for each entry, print the number, the name,
|
||||||
|
and the substring itself. */
|
||||||
|
|
||||||
|
tabptr = name_table;
|
||||||
|
for (i = 0; i < namecount; i++)
|
||||||
|
{
|
||||||
|
int n = (tabptr[0] << 8) | tabptr[1];
|
||||||
|
printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2,
|
||||||
|
ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
|
||||||
|
tabptr += name_entry_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************************************
|
||||||
|
* If the "-g" option was given on the command line, we want to continue *
|
||||||
|
* to search for additional matches in the subject string, in a similar *
|
||||||
|
* way to the /g option in Perl. This turns out to be trickier than you *
|
||||||
|
* might think because of the possibility of matching an empty string. *
|
||||||
|
* What happens is as follows: *
|
||||||
|
* *
|
||||||
|
* If the previous match was NOT for an empty string, we can just start *
|
||||||
|
* the next match at the end of the previous one. *
|
||||||
|
* *
|
||||||
|
* If the previous match WAS for an empty string, we can't do that, as it *
|
||||||
|
* would lead to an infinite loop. Instead, a special call of pcre_exec() *
|
||||||
|
* is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set. *
|
||||||
|
* The first of these tells PCRE that an empty string at the start of the *
|
||||||
|
* subject is not a valid match; other possibilities must be tried. The *
|
||||||
|
* second flag restricts PCRE to one match attempt at the initial string *
|
||||||
|
* position. If this match succeeds, an alternative to the empty string *
|
||||||
|
* match has been found, and we can print it and proceed round the loop, *
|
||||||
|
* advancing by the length of whatever was found. If this match does not *
|
||||||
|
* succeed, we still stay in the loop, advancing by just one character. *
|
||||||
|
* In UTF-8 mode, which can be set by (*UTF8) in the pattern, this may be *
|
||||||
|
* more than one byte. *
|
||||||
|
* *
|
||||||
|
* However, there is a complication concerned with newlines. When the *
|
||||||
|
* newline convention is such that CRLF is a valid newline, we must *
|
||||||
|
* advance by two characters rather than one. The newline convention can *
|
||||||
|
* be set in the regex by (*CR), etc.; if not, we must find the default. *
|
||||||
|
*************************************************************************/
|
||||||
|
|
||||||
|
if (!find_all) /* Check for -g */
|
||||||
|
{
|
||||||
|
pcre_free(re); /* Release the memory used for the compiled pattern */
|
||||||
|
return 0; /* Finish unless -g was given */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
|
||||||
|
sequence. First, find the options with which the regex was compiled; extract
|
||||||
|
the UTF-8 state, and mask off all but the newline options. */
|
||||||
|
|
||||||
|
(void)pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &option_bits);
|
||||||
|
utf8 = option_bits & PCRE_UTF8;
|
||||||
|
option_bits &= PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_CRLF|
|
||||||
|
PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF;
|
||||||
|
|
||||||
|
/* If no newline options were set, find the default newline convention from the
|
||||||
|
build configuration. */
|
||||||
|
|
||||||
|
if (option_bits == 0)
|
||||||
|
{
|
||||||
|
int d;
|
||||||
|
(void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
|
||||||
|
/* Note that these values are always the ASCII ones, even in
|
||||||
|
EBCDIC environments. CR = 13, NL = 10. */
|
||||||
|
option_bits = (d == 13)? PCRE_NEWLINE_CR :
|
||||||
|
(d == 10)? PCRE_NEWLINE_LF :
|
||||||
|
(d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
|
||||||
|
(d == -2)? PCRE_NEWLINE_ANYCRLF :
|
||||||
|
(d == -1)? PCRE_NEWLINE_ANY : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* See if CRLF is a valid newline sequence. */
|
||||||
|
|
||||||
|
crlf_is_newline =
|
||||||
|
option_bits == PCRE_NEWLINE_ANY ||
|
||||||
|
option_bits == PCRE_NEWLINE_CRLF ||
|
||||||
|
option_bits == PCRE_NEWLINE_ANYCRLF;
|
||||||
|
|
||||||
|
/* Loop for second and subsequent matches */
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
int options = 0; /* Normally no options */
|
||||||
|
int start_offset = ovector[1]; /* Start at end of previous match */
|
||||||
|
|
||||||
|
/* If the previous match was for an empty string, we are finished if we are
|
||||||
|
at the end of the subject. Otherwise, arrange to run another match at the
|
||||||
|
same point to see if a non-empty match can be found. */
|
||||||
|
|
||||||
|
if (ovector[0] == ovector[1])
|
||||||
|
{
|
||||||
|
if (ovector[0] == subject_length) break;
|
||||||
|
options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Run the next matching operation */
|
||||||
|
|
||||||
|
rc = pcre_exec(
|
||||||
|
re, /* the compiled pattern */
|
||||||
|
NULL, /* no extra data - we didn't study the pattern */
|
||||||
|
subject, /* the subject string */
|
||||||
|
subject_length, /* the length of the subject */
|
||||||
|
start_offset, /* starting offset in the subject */
|
||||||
|
options, /* options */
|
||||||
|
ovector, /* output vector for substring information */
|
||||||
|
OVECCOUNT); /* number of elements in the output vector */
|
||||||
|
|
||||||
|
/* This time, a result of NOMATCH isn't an error. If the value in "options"
|
||||||
|
is zero, it just means we have found all possible matches, so the loop ends.
|
||||||
|
Otherwise, it means we have failed to find a non-empty-string match at a
|
||||||
|
point where there was a previous empty-string match. In this case, we do what
|
||||||
|
Perl does: advance the matching position by one character, and continue. We
|
||||||
|
do this by setting the "end of previous match" offset, because that is picked
|
||||||
|
up at the top of the loop as the point at which to start again.
|
||||||
|
|
||||||
|
There are two complications: (a) When CRLF is a valid newline sequence, and
|
||||||
|
the current position is just before it, advance by an extra byte. (b)
|
||||||
|
Otherwise we must ensure that we skip an entire UTF-8 character if we are in
|
||||||
|
UTF-8 mode. */
|
||||||
|
|
||||||
|
if (rc == PCRE_ERROR_NOMATCH)
|
||||||
|
{
|
||||||
|
if (options == 0) break; /* All matches found */
|
||||||
|
ovector[1] = start_offset + 1; /* Advance one byte */
|
||||||
|
if (crlf_is_newline && /* If CRLF is newline & */
|
||||||
|
start_offset < subject_length - 1 && /* we are at CRLF, */
|
||||||
|
subject[start_offset] == '\er' &&
|
||||||
|
subject[start_offset + 1] == '\en')
|
||||||
|
ovector[1] += 1; /* Advance by one more. */
|
||||||
|
else if (utf8) /* Otherwise, ensure we */
|
||||||
|
{ /* advance a whole UTF-8 */
|
||||||
|
while (ovector[1] < subject_length) /* character. */
|
||||||
|
{
|
||||||
|
if ((subject[ovector[1]] & 0xc0) != 0x80) break;
|
||||||
|
ovector[1] += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue; /* Go round the loop again */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Other matching errors are not recoverable. */
|
||||||
|
|
||||||
|
if (rc < 0)
|
||||||
|
{
|
||||||
|
printf("Matching error %d\en", rc);
|
||||||
|
pcre_free(re); /* Release memory used for the compiled pattern */
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Match succeded */
|
||||||
|
|
||||||
|
printf("\enMatch succeeded again at offset %d\en", ovector[0]);
|
||||||
|
|
||||||
|
/* The match succeeded, but the output vector wasn't big enough. */
|
||||||
|
|
||||||
|
if (rc == 0)
|
||||||
|
{
|
||||||
|
rc = OVECCOUNT/3;
|
||||||
|
printf("ovector only has room for %d captured substrings\en", rc - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* As before, show substrings stored in the output vector by number, and then
|
||||||
|
also any named substrings. */
|
||||||
|
|
||||||
|
for (i = 0; i < rc; i++)
|
||||||
|
{
|
||||||
|
char *substring_start = subject + ovector[2*i];
|
||||||
|
int substring_length = ovector[2*i+1] - ovector[2*i];
|
||||||
|
printf("%2d: %.*s\en", i, substring_length, substring_start);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (namecount <= 0) printf("No named substrings\en"); else
|
||||||
|
{
|
||||||
|
unsigned char *tabptr = name_table;
|
||||||
|
printf("Named substrings\en");
|
||||||
|
for (i = 0; i < namecount; i++)
|
||||||
|
{
|
||||||
|
int n = (tabptr[0] << 8) | tabptr[1];
|
||||||
|
printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2,
|
||||||
|
ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
|
||||||
|
tabptr += name_entry_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} /* End of loop to find second and subsequent matches */
|
||||||
|
|
||||||
|
printf("\en");
|
||||||
|
pcre_free(re); /* Release memory used for the compiled pattern */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcredemo.c */
|
||||||
|
.EE
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user