diff -crP php-2.0.1/README.jp php-2.0.1.jp_urat-5.3/README.jp
*** php-2.0.1/README.jp	Thu Jan  1 09:00:00 1970
--- php-2.0.1.jp_urat-5.3/README.jp	Thu Mar 26 03:53:40 1998
***************
*** 0 ****
--- 1,329 ----
+ PHP/FI 2.0.1 日本語+αパッチ 5.3                                  1998.3.26
+ php-2.0.1.jp_urat-5.3.gz
+ md5 checksum: 
+                                           浦栃 裕 <urat@first.tsukuba.ac.jp
+                                       http://www.first.tsukuba.ac.jp/~urat/
+ 
+ 
+   このパッチは、おみつさんのPHP/FI日本語パッチに、私が知る全てのPHP/FIの
+ 日本語パッチの要素を盛り込んだものです。
+   配布、バグ報告、サポートなどは
+ 
+     Apacheワールド
+       http://www.first.tsukuba.ac.jp/docs/install/apache/1.html
+ 
+     PHPメーリングリスト(日本語)
+       http://sidecar.ics.es.osaka-u.ac.jp/php-jp/
+ 
+     PostgreSQLメーリングリスト(日本語)
+       http://www.sra.co.jp/people/t-ishii/PostgreSQL/
+ 
+   にて行ないます。お気軽にメール下さい。
+ 
+ 
+ 取り込まれたパッチの説明
+ 
+   おみつさんの PHP/FI 2.0b12 日本語パッチ第 5 版に、
+ 
+     [pgsql-jp 2535]
+       http://www.sra.co.jp/people/t-ishii/PostgreSQL/mhonarc/pgsql-jp/1997Sep/msg00108.html
+ 
+     [pgsql-jp 2689]
+       http://www.sra.co.jp/people/t-ishii/PostgreSQL/mhonarc/pgsql-jp/1997Oct/msg00103.html
+ 
+   をあてたものが、第 5 版 fix1 として、そして パスワード認証でPHP/FIから
+ Postgresにログインする関数を付け足すパッチ kanji2.0b12.patch-fix2 が、
+ おみつさんご本人からリリースされました。
+   ファイル名からして、この時点で 日本語パッチ v5.2 であると考えられます。
+ 
+   このパッチは、それからさらに:
+ 
+ ・lex.c の ParserInit() 内での gsize変数 の扱いの修正
+     http://www.sra.co.jp/people/t-ishii/PostgreSQL/mhonarc/pgsql-jp/1998Mar/msg00230.html
+ 
+ ・石井@SRAさんによる、同じくパスワード認証でPHP/FIからPostgresにログイン
+   する関数を付け足すパッチ
+   こちらの方がおみつさんの同様のパッチよりよりPHP/FIオリジナルの関数の
+   形に近いので、こちらを採用
+     ftp://ftp.sra.co.jp/pub/cmd/postgres/php-fi/pg_passwd_auth.patch.gz
+ 
+ ・access logging を on にしているときに特に速度が向上するパッチ
+   本家 PHP/FI メーリングリスト(英語)
+     Subject: [PHP] fileInode($fd) instead of fileInode(filename)?
+   ./patch/log_speedup.patch にそのメールがあります
+ 
+ ・fileInode() の引数にファイル名だけでなく inode も指定できるようにする
+   パッチ
+   本家 PHP/FI メーリングリスト(英語)
+     Subject: [PHP] speedup patch for php2
+   ./patch/fileinode.patch にそのメールがあります
+ 
+   以上の向上がなされています。機能的に 日本語パッチ v5.2 相当のものに、
+ バグフィックスと性能の向上がなされているため、日本語パッチ v5.3 を名乗
+ ることにしました。
+   混乱を避けるために、アーカイブ名に urat という固有名を入れてあります。
+ 
+ 
+ 協力＆さんくす
+   おみつさん
+   小沢＠東光電気さん、
+   PHPメーリングリスト(日本語)のみなさん
+   PostgreSQLメーリングリスト(日本語)のみなさん
+ 
+ 
+ 将来
+   PHP/FIなんて捨てて、PHP3の日本語(国際化)をやらないとあきませんね。
+   ドキュメントの整備。ドキュメントより大事なものはありません。
+ 
+ 
+ おみつさんによるオリジナルの README.kanji.euc
+ 
+ -->8-->8-->8-->8
+ 
+ README.kanji.euc  === PHP/FI 漢字コードハンドリング 第 5 版 ===
+ 
+ PHP/FI 2.0b12 用
+ 
+ ======================================================================
+ INTRO
+ ----------------------------------------------------------------------
+ PHP/FIにて漢字コードの変換を実現するパッケージを組んでみました。
+ ですが、このコードはほぼすべて「ぱくり」です ;-) 
+ 
+ ベースは、
+ 	1. shige@csk.JUNET さんの filters-2.0
+ 	2. 佐藤公彦(K.Sato)さんの qkc 1.0
+ 	3. 谷本孝浩(NBC02362@niftyserve.or.jp)の gawk-2.15.4+mb1.03
+ から拝借いたしました。
+ 
+ ======================================================================
+ INSTALL
+ ----------------------------------------------------------------------
+ PHP/FI 2.0b12 のソースのトップディレクトリ(.../php-2.0b12/)で
+ 	% patch -p1 < kanji2.0b12.patch
+ を実行し、あとは PHP/FI のトップディレクトリ
+ (.../php-2.0b12/)にて通常通りの 
+ 	% ./install 
+ とします。
+ 
+ installスクリプトの最後に、
+ 	Do you want to use multi-byte extension for regex library? [y/N] 
+ と訊かれますので、ここで「Y」を入力すると、
+ マルチバイト拡張正規表現が使えるようになります。
+ 
+ # 詳しくは、jp.regex/README.MB を参照して下さい。
+ # ただし、gawk-2.15.4+mb1.03のものをそのまま置いてありますので、
+ # 御了承のほどを...
+ 
+ また、システムにPOSIX正規表現のパッケージがない場合、
+ .../php-2.0b12/config.h の
+ 
+ /* Define if you have the regcomp function.  */
+ #define HAVE_REGCOMP 1
+ 
+ がコメントアウトされていることがあります。
+ その場合マルチバイト拡張正規表現を有効にするため、
+ この行を #define して下さい。
+ (上記のようになっていれば OK です。)
+ 
+ ======================================================================
+ APACHE MODULE
+ ----------------------------------------------------------------------
+ アパッチのモジュールとして実行される場合、上記マルチバイト拡張のlibregex
+ を使用するには、アパッチのコンパイル時に 
+ 	jp.regex/libregex.a
+ 	jp.regex/regex.h
+ の両ファイルが必要となります。
+ 
+ 1. 上記ファイルをアパッチのソースディレクトリにコピーします。
+ 
+ 	% cp -r jp.regex /usr/local/etc/httpd
+ 	( /usr/local/etc/httpd は適当なディレクトリに変更して下さい)
+ 
+ 2. Configuration の中のコンパイルフラッグに jp.regex を加えます。
+    PHP/FI のライブラリとソースも加えます。
+ 
+ 	EXTRA_CFLAGS= -I./jp.regex -DSERVER_SUBVERSION=\"PHP/FI-2.0b12\"
+ 	EXTRA_LIBS= -L. -lphp -lgdbm -L/usr/local/pgsql/lib -lpq -L./jp.regex -lregex -lm
+ 
+ 3. ConfigurationのWANTHSREGEXの値をdefault から no に変更して、
+    配布 regex パッケージのリンクをしない用にします。
+ 
+ 	Rule WANTHSREGEX=default
+                          ↓
+ 	Rule WANTHSREGEX=no
+ 
+ # apache ML の つっしー(m2@soum.co.jp)さん ありがとうございます。
+ 
+ 4. Configuration の最後に PHP/FI の Module を宣言します。
+ 
+ 	Module php_module   mod_php.o
+ 
+ 5. Configure を実行して Makefile をつくります。
+ 
+ % ./Configure
+ 
+ 6. make します。
+ 
+ % make
+ 
+ ======================================================================
+ HOW-TO
+ ----------------------------------------------------------------------
+ このパッケージは、PHP/FI 上で漢字コードを使う際に内部コードを
+  EUC に統一し、その出力コードをセットするものです。
+ 
+ また、新しい関数として、
+ 	MBstrlen(string)
+ 	MBsubstr(string,start,length)
+ の2つを加えてあります。
+ 
+ ----------------------------------------------------------------------
+ 1. SetKanjiOutput(mode)
+ 
+ 関数「SetKanjiOutput("モード")」を指定することで、
+ その出力コードを指定することができます。
+ 
+ [モード]
+   モードは { EUC | JIS | SJIS } の 3種類 です。
+   文字列として指定します。
+ 
+ 
+ Apache サーバーの場合には、
+ .htaccess ファイルの中で
+ 
+     phpKanjiOutput モード
+ 
+ を指定することで、出力コードを統一します。デフォルトでは EUC になります。
+ 
+ これを変更した場合には、お手数ですが、ソースツリー(.../php-2.0b9/src/)
+ にある kanjiconv.cを手動で変更して下さい。
+ Apache の場合 61行目、それ以外では 63行目の
+ 
+     output_kanji_code=EUC;
+ 
+ の「EUC」の部分を JIS または SJIS に変更して下さい。
+ ( ここでは、文字列にはしないで下さい。
+   php.h の中で #define された int型 になります )
+ 
+ ----------------------------------------------------------------------
+ 2. MBstrlen(string)
+ 
+ string に指定された文字列の「文字数」を返します。
+ 
+ ----------------------------------------------------------------------
+ 3. MBsubstr(string, start, length)
+ 
+ string に指定された文字列から、start で始まり length 文字分の文字列を
+ 取り出します。
+ start は 0 から始まる整数です。
+ 
+ ======================================================================
+ EXAMPLE
+ ----------------------------------------------------------------------
+ 1. SetKanjiOutput(mode)
+ <? 
+   ...
+   $str="おみつ"; /* この時点で内部コードは EUC になります */
+   pg_exec("select * from address where person='$str'");
+   ...
+ 
+   $res=" 検索結果 : $str ";
+ 
+   SetKanjiOutput("JIS"); /* ブラウザに JIS で表示 */
+   echo "<h2> $res </h2>";
+   ...
+ 
+   SetKanjiOutput("SJIS"); /* ファイルへ SJIS で保存 */
+   $fp=fopen("/dos/result.txt","w");
+     fputs($fp,$res);
+   fclose($fp);
+   ...
+ >
+ 
+ ブラウザのみならず、ファイルへの出力も制御できます。
+ デバイスへの出力前に使用して下さい。
+ 
+ デフォルトの出力コードは「EUC」です。
+ 
+ 2. MBstrlen(string)
+ 
+ <?
+   $str = "おみつのMB関数";
+   $len = strlen($str);
+   $mblen = MBstrlen($str);
+ 
+   echo "len = $len, mblen = $mblen\n";
+ >
+ 
+ 結果:
+   len = 14, mblen = 8
+ 
+ 3. MBsubstr(string)
+ 
+ <?
+   $str = "おみつのMB関数";
+   $mitsu = MBsubstr($str, 0, 3);
+ 
+   echo "mitsu = $mitsu\n";
+ >
+ 
+ 結果:
+   mitsu = おみつ
+ 
+ 
+ ======================================================================
+ MACHINE 
+ ----------------------------------------------------------------------
+ 確認済み動作環境は、
+   Linux 2.0.27 (Slackware 3.1) + JE 0.9.8
+   + PostgreSQL v6.0 + JP-patch
+ 上の
+  apache_1.2.0 + php-2.0b12
+ バージョンです。
+ 
+ ======================================================================
+ CHANGES
+ ----------------------------------------------------------------------
+ -- 第 4 版
+ * 少々のバグフィクス
+   + CGI版 (php.cgi) を作成するときに echo.c でエラーが起こる不具合
+ 
+   + jp.regex/libregex.a スタティックライブラリをリンクしない不具合
+ 
+   + install スクリプトで HAVE_REGCOMP が定義されない(POSIXのライブラリ
+     を持たない)マシンでの不具合
+ 
+ -- 第 3 版
+ * CGI版の Echo 関数が常に EUC を出力する不具合を修正
+ 
+ * Mail 関数に日本語を使用すると、EUCのまま送られる不具合を修正
+ 
+ * MBstrlen, MBsubstr のマルチバイト関数を追加
+ 
+ * 前回流した「KanjiConv(文字列,モード)」は、内部コード
+   を統一したことによって機能しなくなりましたので、
+   削除させて頂きました。
+ 
+  
+ ======================================================================
+ NOTICE
+ ----------------------------------------------------------------------
+ * 「とりあえず動くもの」を大前提として組みましたので
+   なにかしら問題もあるかと思われますが、完全に無保証です。
+   各人の責任において御使用ください。
+ 
+ * APACHE 上でわざわざ regex のパッケージが配布されているということは、
+   何か問題があるのかな? と思いつつ、配布パッケージは使用していません。
+   なにか問題があれば、連絡頂けると幸いです。
+ 
+ ======================================================================
+ TODO
+ ----------------------------------------------------------------------
+ 高速化 mmap() の対応 (いまだに...すみません不勉強で _o_)
+ 
+ ======================================================================
+ 08-19-1997 前田充宏
+ mitsu@tramp.co.jp
+ 
+ -->8-->8-->8-->8
diff -crP php-2.0.1/config.h.in php-2.0.1.jp_urat-5.3/config.h.in
*** php-2.0.1/config.h.in	Tue May 27 07:35:37 1997
--- php-2.0.1.jp_urat-5.3/config.h.in	Wed Feb 18 21:03:39 1998
***************
*** 40,46 ****
  #undef HAVE_STRCASECMP
  
  /* Define if you have the mmap function.  */
! #undef HAVE_MMAP
  
  /* Define if you have the putenv function.  */
  #undef HAVE_PUTENV
--- 40,46 ----
  #undef HAVE_STRCASECMP
  
  /* Define if you have the mmap function.  */
! /* #undef HAVE_MMAP */
  
  /* Define if you have the putenv function.  */
  #undef HAVE_PUTENV
diff -crP php-2.0.1/install php-2.0.1.jp_urat-5.3/install
*** php-2.0.1/install	Wed Nov 19 15:50:15 1997
--- php-2.0.1.jp_urat-5.3/install	Wed Feb 18 21:03:39 1998
***************
*** 714,720 ****
  	PHPSENDMAIL=""
  fi
  
! if grep "#define HAVE_REGCOMP 1" config.h >/dev/null 2>&1
  then
  	echo "Your system appears to have a Posix compliant regex library";
  	echo "On some systems this library is broken.  UnixWare 2.0.x is an"
--- 714,741 ----
  	PHPSENDMAIL=""
  fi
  
! $ECHO_N "Do you want to use multi-byte extension for regex library? [y/N] "
! read a
! if [ "$a" = "y" -o "$a" = Y ]
! then
! 	echo "Using multi-byte extension for regular expression library"
! 	echo ""
! 	LIBREGEX=-lregex
! 	LIBREGEXDIR=-L./jp.regex
! 	REGEX_INCLUDE=-I./jp.regex
! 	OURREGEX=jp.regex/libregex.a
! 	REGEXMSG1="For Apache 1.1.1 you must copy src/jp.regex/libregex.a and src/jp.regex/regex.h to $APACHE_DIR and add -I. and -llibregex.a to Configuration"
! 	REGEXMSG2="For Apache 1.2 or higher you must set the WANTHSREGEX Configuration parameter to N and copy src/jp.regex/libregex.a and src/jp.regex/regex.h to $APACHE_DIR then add -I. and -lregex to Configuration"
! 	STATICLIBREGEX=regex/libregex.a
! 	echo "#define MB 1" >> config.h
! 
! 	if grep "#define HAVE_REGCOMP 1" config.h >/dev/null 2>&1
! 	then
! 		sed "s/^.*HAVE_REGCOMP.*$/#define HAVE_REGCOMP 1/" < config.h >config.$$
! 		rm -f config.h
! 		mv config.$$ config.h
! 	fi
! elif grep "#define HAVE_REGCOMP 1" config.h >/dev/null 2>&1
  then
  	echo "Your system appears to have a Posix compliant regex library";
  	echo "On some systems this library is broken.  UnixWare 2.0.x is an"
diff -crP php-2.0.1/patch/fileinode.patch php-2.0.1.jp_urat-5.3/patch/fileinode.patch
*** php-2.0.1/patch/fileinode.patch	Thu Jan  1 09:00:00 1970
--- php-2.0.1.jp_urat-5.3/patch/fileinode.patch	Thu Mar 26 03:28:23 1998
***************
*** 0 ****
--- 1,111 ----
+ From php-list-return-5475-urat=first.tsukuba.ac.jp@php.iquest.net  Sat Feb  7 08:17:52 1998
+ Return-Path: php-list-return-5475-urat=first.tsukuba.ac.jp@php.iquest.net
+ Received: from iquest3.iquest.net (iquest3.iquest.net [209.43.20.203]) by daichi.first.tsukuba.ac.jp (8.8.8/3.4W396040220) with SMTP id IAA16086 for <urat@first.tsukuba.ac.jp>; Sat, 7 Feb 1998 08:17:51 +0900 (JST)
+ Received: (qmail 25490 invoked by uid 54979); 6 Feb 1998 20:46:46 -0000
+ Mailing-List: contact php-list-help@php.iquest.net; run by ezmlm
+ Delivered-To: mailing list php-list@php.iquest.net
+ Received: (qmail 25451 invoked from network); 6 Feb 1998 20:46:44 -0000
+ Received: from mail.esc.de (HELO esc.de) (194.115.54.34)
+   by iquest3.iquest.net with SMTP; 6 Feb 1998 20:46:44 -0000
+ Received: from workaholics.net (guivol@escpc23.esc.de [194.115.54.153])
+ 	by esc.de (8.8.5/8.8.5) with ESMTP id VAA17307
+ 	for <php-list@php.iquest.net>; Fri, 6 Feb 1998 21:46:44 +0100
+ Sender: guivol@esc.de
+ Message-ID: <34DB7705.489FBD27@workaholics.net>
+ Date: Fri, 06 Feb 1998 21:48:06 +0100
+ From: Guido Vollbeding <guivol@workaholics.net>
+ Organization: Independent Workaholics Network
+ X-Mailer: Mozilla 4.04 [en] (X11; I; Linux 2.0.33 i586)
+ MIME-Version: 1.0
+ To: php-list@php.iquest.net
+ Subject: [PHP] fileInode($fd) instead of fileInode(filename)?
+ Content-Type: text/plain; charset=iso-8859-1
+ Content-Transfer-Encoding: 8bit
+ Status: RO
+ 
+ Hi folks,
+ 
+ for a Digital Image Archive project based on a MySQL database with phtml
+ interface I need the PHP fileInode() function with a file descriptor
+ argument rather than the filename.
+ 
+ Fortunately, it was easy to patch the php source for the desired feature
+ and it seems to work properly.
+ I use PHP 2.0.1 as module with Apache 1.2.5.
+ 
+ The simple idea is to check the argument type (LNUMBER or STRING) in the
+ function implementation and call the standard library fstat() function
+ instead of stat() if the argument is of type LNUMBER instead of STRING.
+ Here is the patch:
+ 
+ escpc23:/www/php/src # diff -C 2 -p file.c.orig file.c
+ *** file.c.orig Fri Feb  6 14:10:24 1998
+ --- file.c      Fri Feb  6 14:21:12 1998
+ *************** void FileFunc(int type) {
+ *** 608,611 ****
+ --- 608,617 ----
+                 return;
+         }
+ + if (s->type == LNUMBER) {
+ +       if (fstat((int)s->intval,&sb) == -1) {
+ +               Push("-1",LNUMBER);
+ +               return;
+ +       }
+ + } else {
+   #if APACHE
+       if(!CurrentStatFile) {
+ *************** void FileFunc(int type) {
+ *** 632,635 ****
+ --- 638,642 ----
+                 }
+         }
+ + }
+         switch(type) {
+         case 0: /* fileperms */
+ 
+ Would it be reasonable to include this feature in future php releases?
+ 
+ Here is the background for my need:
+ In order to achieve a flexible image data handling and access, I use a
+ modified filesystem (under Linux) *without* directories and filenames.
+ Instead, image files are accessed directly via the inode number. The
+ inode number is then stored in the corresponding database record for
+ reference.
+ 
+ The image files can then simply be read through ordinary system functions
+ by giving the inode number as string as the "filename" argument in the
+ open-call.
+ In php it looks like:
+   $oresult = GetImageSize("$DOCUMENT_ROOT/imgo/$oino.jpg");
+   <img src="<?echo "/imgo/$oino.jpg">" <?echo $oresult[3]>>
+ The "imgo" directory is the mount point for my modified file-system,
+ which simply converts the filename argument (like atol()) to get the
+ inode number for open (no dir search).
+ So far it works well without further modifications.
+ 
+ However, when creating the file (via http file upload), I need a way
+ to fetch the generated inode number information. Since I don�t have
+ a �valid� filename at this point, I do this by creating an empty file
+ first and ask the inode number via the file descriptor. Afterwards,
+ I can rewrite the actual file as usual.
+ 
+ $fd = fopen("$DOCUMENT_ROOT/imgo/a","w");
+ $oino = fileInode($fd);
+ fclose($fd);
+ exec("cp $filename $DOCUMENT_ROOT/imgo/$oino.jpg");
+ 
+ (Note: "a" is an arbitrary �dummy� filename which is not used further
+ inside the modified filesystem. The only reference for later access is
+ the generated inode number, stored in the database instead of a dir.)
+ 
+ This scheme seems to work well in the desired fashion,
+ with minimal modifications.
+ 
+ Regards,
+ Guido
+ 
+ ______________________________PHP/FI Mailing List______________________________
+ To unsubscribe send an empty message to php-list-unsubscribe@php.iquest.net
+ To unsub you@host.com, use: php-list-unsubscribe-you=host.com@php.iquest.net
+ For help: php-list-help@php.iquest.net  Archive: http://www.tryc.on.ca/php.html
+ 
diff -crP php-2.0.1/patch/log_speedup.patch php-2.0.1.jp_urat-5.3/patch/log_speedup.patch
*** php-2.0.1/patch/log_speedup.patch	Thu Jan  1 09:00:00 1970
--- php-2.0.1.jp_urat-5.3/patch/log_speedup.patch	Thu Mar 26 03:28:15 1998
***************
*** 0 ****
--- 1,77 ----
+ From php-list-return-5424-urat=first.tsukuba.ac.jp@php.iquest.net  Thu Feb  5 06:31:57 1998
+ Return-Path: php-list-return-5424-urat=first.tsukuba.ac.jp@php.iquest.net
+ Received: from iquest3.iquest.net (iquest3.iquest.net [209.43.20.203]) by daichi.first.tsukuba.ac.jp (8.8.8/3.4W396040220) with SMTP id GAA07988 for <urat@first.tsukuba.ac.jp>; Thu, 5 Feb 1998 06:31:56 +0900 (JST)
+ Received: (qmail 16597 invoked by uid 54979); 4 Feb 1998 21:30:25 -0000
+ Mailing-List: contact php-list-help@php.iquest.net; run by ezmlm
+ Delivered-To: mailing list php-list@php.iquest.net
+ Received: (qmail 16574 invoked from network); 4 Feb 1998 21:30:24 -0000
+ Received: from ns.viet.net (207.201.22.241)
+   by iquest3.iquest.net with SMTP; 4 Feb 1998 21:30:24 -0000
+ Received: from localhost (tin@localhost) by ns.viet.net (8.8.8/8.8.8) with SMTP id NAA29317 for <php-list@php.iquest.net>; Wed, 4 Feb 1998 13:30:08 -0800
+ Date: Wed, 4 Feb 1998 13:30:08 -0800 (PST)
+ From: Tin Le <tin@netimages.com>
+ X-Sender: tin@ns.viet.net
+ To: php-list@php.iquest.net
+ Subject: [PHP] speedup patch for php2
+ Message-ID: <Pine.LNX.3.96.980204131811.23588K-100000@ns.viet.net>
+ MIME-Version: 1.0
+ Content-Type: TEXT/PLAIN; charset=US-ASCII
+ Status: RO
+ 
+ For those who are still using PHP2 _and_ especially if you have logging
+ turned on for most of your web pages, here is a speed up patch that should
+ help gain you some extra performance.  It works for me on a small site that
+ gets around 30K hits a day. I'll try it out on a larger site next.
+ 
+ Background, I was looking into a problem I was having with Apache 1.2.5 and
+ both PHP2 and PHP3 compiled in as modules, when I noticed the minor speedup
+ potential in log.c.  Essentially it's calling _RegReplace() to change all
+ '/' into '_'.  I figured since that is a special case, no regexs or
+ anything else to worry about, it's safe to do this inline and save the
+ overhead of a function call, plus _RegReplace has all the other overheads.
+ I put #ifdef around the call to _RegReplace() in case anyone ever wants to
+ put that back.
+ 
+ To apply this patch, cd to the src directory of PHP/FI 2.
+ 
+ ------------php2.cdif----------------------------------
+ *** log.c        Wed Feb  4 12:57:37 1998
+ --- log.c.new    Sun Feb  1 23:01:01 1998
+ ***************
+ *** 120,125 ****
+ --- 120,126 ----
+   
+   char *filename_to_logfn(char *filename) {
+          char *lfn, *lp, *ret;
+ + int i;
+   
+          if (forcelogfile) { filename = forcelogfile; }
+          lfn = estrdup(1,filename);
+ ***************
+ *** 132,138 ****
+ --- 133,144 ----
+                          lp++;
+                  }
+          }
+ + #if 0
+          lp = _RegReplace("/","_",lp);
+ + #else
+ +        for (i=0, lfn=lp; *lfn && i<PATH_MAX; lfn++, i++)
+ +                if (*lfn == '/') *lfn = '_';
+ + #endif
+          ret = estrdup(1,lp);
+          return(ret);
+   }
+ 
+ Tin Le
+ 
+ ----
+ Net Images - Premier Web Presence Provider         http://www.netimages.com
+ Tin Le - tin@netimages.com
+ 
+ 
+ ______________________________PHP/FI Mailing List______________________________
+ To unsubscribe send an empty message to php-list-unsubscribe@php.iquest.net
+ To unsub you@host.com, use: php-list-unsubscribe-you=host.com@php.iquest.net
+ For help: php-list-help@php.iquest.net  Archive: http://www.tryc.on.ca/php.html
+ 
diff -crP php-2.0.1/src/Makefile.in php-2.0.1.jp_urat-5.3/src/Makefile.in
*** php-2.0.1/src/Makefile.in	Sat Nov 15 16:45:42 1997
--- php-2.0.1.jp_urat-5.3/src/Makefile.in	Wed Feb 18 21:03:39 1998
***************
*** 130,142 ****
  	  type.o conf.o acc.o local.o dns.o log.o sort.o dir.o rand.o \
  	  gd.o mime.o fsock.o microtime.o pg95.o pool.o \
  	  uniqid.o soundex.o syslog.o mysql.o solid.o mail.o image.o md5.o \
! 	  snmp.o oracle.o filepro.o illustra.o odbc.o adabasd.o
  
  all: $(PHP_PROGS) @MODULE_TARGET@
  
  regex/libregex.a:
  	cd regex; make lib CC=$(CC) AUX_CFLAGS='$(CFLAGS)' RANLIB=$(RANLIB)
  
  .c.o:
  	$(CC) $(CFLAGS) $(SYSTEM) $(NSAPI_DEFS) $(NSAPI_INCLUDE) $(CPPFLAGS) -c $<
  
--- 130,146 ----
  	  type.o conf.o acc.o local.o dns.o log.o sort.o dir.o rand.o \
  	  gd.o mime.o fsock.o microtime.o pg95.o pool.o \
  	  uniqid.o soundex.o syslog.o mysql.o solid.o mail.o image.o md5.o \
! 	  snmp.o oracle.o filepro.o illustra.o odbc.o adabasd.o \
! 	  kanjiconv.o mb.o
  
  all: $(PHP_PROGS) @MODULE_TARGET@
  
  regex/libregex.a:
  	cd regex; make lib CC=$(CC) AUX_CFLAGS='$(CFLAGS)' RANLIB=$(RANLIB)
  
+ jp.regex/libregex.a:
+ 	cd jp.regex; make lib CC=$(CC) AUX_CFLAGS='$(CFLAGS)' RANLIB=$(RANLIB)
+ 
  .c.o:
  	$(CC) $(CFLAGS) $(SYSTEM) $(NSAPI_DEFS) $(NSAPI_INCLUDE) $(CPPFLAGS) -c $<
  
***************
*** 218,224 ****
  	rm -f Makefile ../config.status ../config.cache ../config.log 
  
  clean:
! 	rm -f php.cgi libphp.a *.o core *.so.* regex/*.a regex/*.o
  
  tags:
  	etags ${srcdir}/*.c ${srcdir}/*.h ${srcdir}/[a-z]*.in
--- 222,228 ----
  	rm -f Makefile ../config.status ../config.cache ../config.log 
  
  clean:
! 	rm -f php.cgi libphp.a *.o core *.so.* regex/*.a regex/*.o jp.regex/*.a jp.regex/*.o
  
  tags:
  	etags ${srcdir}/*.c ${srcdir}/*.h ${srcdir}/[a-z]*.in
***************
*** 300,305 ****
--- 304,310 ----
  image.o: image.c php.h parse.h
  syslog.o: syslog.c php.h parse.h
  oracle.o: oracle.c oracle.h php.h parse.h
+ kanjiconv.o: kanjiconv.c php.h parse.h
  illustra.o: illustra.c php.h parse.h
  odbc.o:	odbc.c php.h parse.h
  install: all 
diff -crP php-2.0.1/src/echo.c php-2.0.1.jp_urat-5.3/src/echo.c
*** php-2.0.1/src/echo.c	Sun Nov 23 14:44:38 1997
--- php-2.0.1.jp_urat-5.3/src/echo.c	Wed Feb 18 21:03:39 1998
***************
*** 31,39 ****
  void Echo(char *format, int args) {
  	Stack *s=NULL;
  	Stack sarg[5]; /* Max 5 args to keep things simple in the parser */
- #if APACHE
  	char *buf;
- #endif
  	int num=args, done=0, type;
  	char *t,*st,*beg,*fmt;
  
--- 31,37 ----
***************
*** 112,124 ****
  			StripSlashes(fmt);
  			ParseEscapes(sarg[num].strval);
  			StripSlashes(sarg[num].strval);
- #if APACHE
  			buf = emalloc(1,strlen(fmt)+strlen(sarg[num].strval)+ECHO_BUF);
  			sprintf(buf,fmt,sarg[num].strval);
  			if(PUTS(buf)<0) Exit(0);
- #else
- 			if(printf(fmt,sarg[num].strval) < 0) Exit(0);
- #endif
  			num++;
  			break;
  		}
--- 110,118 ----
diff -crP php-2.0.1/src/file.c php-2.0.1.jp_urat-5.3/src/file.c
*** php-2.0.1/src/file.c	Wed Nov 26 06:40:32 1997
--- php-2.0.1.jp_urat-5.3/src/file.c	Thu Mar 26 03:13:21 1998
***************
*** 607,612 ****
--- 607,618 ----
  		}
  		return;
  	}
+ if (s->type == LNUMBER) {
+     if (fstat((int)s->intval,&sb) == -1) {
+             Push("-1",LNUMBER);
+             return;
+     }
+ } else {
  #if APACHE
      if(!CurrentStatFile) {
          CurrentStatFile = estrdup(0,php_rqst->filename);
***************
*** 631,636 ****
--- 637,643 ----
  			return;
  		}
  	}
+ }
  	switch(type) {
  	case 0: /* fileperms */
  		sprintf(temp,"%ld",(long)sb.st_mode);
***************
*** 1255,1260 ****
--- 1262,1268 ----
  		Push("",STRING);
  		return;
  	}
+ 	(void)conv2euc(buf,len);
  	Push((buf=AddSlashes(buf,1)),STRING);
  }
  
***************
*** 1291,1296 ****
--- 1299,1305 ----
  		Push("",STRING);
  		return;
  	}
+ 	(void)conv2euc(buf,len);
  	rbuf=estrdup(1,buf);
  	c = *buf;
  	lc=(char)0;
***************
*** 1389,1395 ****
  	}
  	ParseEscapes(buf);
  	StripSlashes(buf);
! 	ret = fputs(buf,fp);
  	sprintf(temp,"%d",ret);
  	Push(temp,STRING);
  }	
--- 1398,1404 ----
  	}
  	ParseEscapes(buf);
  	StripSlashes(buf);
! 	ret = kanji_fputs(buf,fp);
  	sprintf(temp,"%d",ret);
  	Push(temp,STRING);
  }	
diff -crP php-2.0.1/src/jp.regex/Makefile php-2.0.1.jp_urat-5.3/src/jp.regex/Makefile
*** php-2.0.1/src/jp.regex/Makefile	Thu Jan  1 09:00:00 1970
--- php-2.0.1.jp_urat-5.3/src/jp.regex/Makefile	Wed Feb 18 21:03:40 1998
***************
*** 0 ****
--- 1,18 ----
+ SHELL = /bin/sh
+ 
+ CFLAGS=-I. $(AUX_CFLAGS) 
+ LFLAGS=
+ LIBS=
+ OBJS=dfa.o mbc.o regex.o
+ SRCS=dfa.c mbc.c regex.c
+ 
+ .c.o:
+ 	$(CC) -c $(CFLAGS) $<
+ 
+ lib: $(OBJS)
+ 	rm -f libregex.a
+ 	ar crv libregex.a $(OBJS)
+ 	$(RANLIB) libregex.a
+ 
+ clean:
+ 	rm -f *.o libregex.a
diff -crP php-2.0.1/src/jp.regex/README.MB php-2.0.1.jp_urat-5.3/src/jp.regex/README.MB
*** php-2.0.1/src/jp.regex/README.MB	Thu Jan  1 09:00:00 1970
--- php-2.0.1.jp_urat-5.3/src/jp.regex/README.MB	Wed Feb 18 21:03:40 1998
***************
*** 0 ****
--- 1,369 ----
+ ●● Gnu Awk (gawk) 2.15, patchlevel 4 + multi-byte extension 1.03 ●●
+ ●●                                          Aug. 29, 1994 by t^2 ●●
+ 
+     gawk-2.15.4+mb1.03 -- マルチバイト文字対応版 Gnu Awk
+ 
+ ●概要
+ 
+     GNU プロジェクトによる awk (以下 gawk) をマルチバイト文字対応化した
+     ものです.
+ 
+ ●使用法
+ 
+     gawk からの拡張部分だけを説明します.
+ 
+     増えたオプションは以下の通りです.
+ 
+         -Wctype=ASCII または --ctype=ASCII
+             マルチバイト文字を考慮しません. このオプションを使用した場合, 
+             オリジナルの gawk を単に８ビットクリーンとした動作となります.
+ 
+         -Wctype=EUC または --ctype=EUC
+             マルチバイト文字として EUC を認識します.
+ 
+         -Wctype=SJIS または --ctype=SJIS
+             マルチバイト文字として Shift-JIS を認識します.
+ 
+         MS-DOS 以外のシステムで, Makefile(.in)? を書き換えずにインストー
+         ルした場合, デフォルトでは EUC を認識します.  MS-DOS ではデフォ
+         ルトで Shift-JIS を認識します.
+ 
+     以下, 組み込み関数などの変更点を挙げます.
+ 
+         substr()    マルチバイト文字を分断させるような指定は自動的に修正
+                     します.  例えば,
+                             substr("aあiいuうeえoお", 3, 4)
+                     は,
+                             substr("aあiいuうeえoお", 2, 3) ==> "あi"
+                     という結果になります.  どちらの引数も小さくなる方向
+                     へ丸めます.
+ 
+         jindex()    これは, 以前からよくある index() の変形です.
+                     index() の結果をバイト位置ではなく文字位置で返します.
+ 
+         jlength()   同じく length() の変形です.  バイト数ではなく文字数
+                     を返します.
+ 
+         jsubstr()   substr() の変形です.  バイト位置, バイト数の指定の代
+                     わりに文字位置, 文字数を指定します.
+ 
+         RS          マルチバイト文字を使用することができます.
+ 
+         その他      ・識別子にマルチバイト文字を使用できます.
+ 
+                     ・パス名に含まれるマルチバイト文字はについては全く考
+                       慮していません.  注意してください.
+ 
+                     ・文字列の大小比較は, 正規表現中の文字クラスの範囲指
+                       定と同様,
+ 
+                           １バイト ASCII 文字 < 半角カナ文字 < 全角文字
+ 
+                       という関係に基づいて比較します.
+ 
+ ● MS-DOS 版実行形式を含むアーカイブについて (それ以外の形態で入手された
+    方は無視してください)
+ 
+     1. アーカイブに含まれているファイル
+ 
+        オリジナルから全く手を加えていないファイル
+ 
+            ACKNOWLE     オリジナルのソースに含まれている ACKNOWLEDGMENT
+            COPYING      オリジナルのソースに含まれている COPYING
+            FUTURES      オリジナルのソースに含まれている FUTURES
+            LIMITATI     オリジナルのソースに含まれている LIMITATIONS
+            NEWS         オリジナルのソースに含まれている NEWS
+            POSIX        オリジナルのソースに含まれている POSIX
+            PROBLEMS     オリジナルのソースに含まれている PROBLEMS
+            README       オリジナルのソースに含まれている README
+ 
+        gawk+mb 用のファイル
+ 
+            CHANGELO.MB  gawk+mb の変更履歴
+            README.MB    このファイル
+ 
+        MS-DOS 版 gawk+mb 用のファイル
+ 
+            GAWK.CAT     オリジナルのソースに含まれているマニュアルページ
+                         gawk.1 を GNU roff でフォーマットしたもの.
+            GAWK.EXE     MS-DOS 版 gawk-2.15.4+mb1.03 の実行形式
+            READMAN.SED  sed を持っている人へおまけ
+                         (sed -f readman.sed gawk.cat)
+ 
+     2. GAWK.EXE について
+ 
+         gawk-2.15.4+mb1.03 を MS-C 6.00A でコンパイルしたものです.
+ 
+         デフォルトで Shift-JIS 漢字コードを含むプログラムやファイルを処
+         理できます.
+ 
+         setargv.obj を組み込んでありますので, MS-DOS でポピュラーなタイ
+         プのワイルドカードが使用できます.  UNIX の csh ライクなワイルド
+         カード展開ルーチンを用意しようかとも思ったのですが, MS-DOS の他
+         のコマンドとの整合性が取れないし, オリジナルをなるべく尊重したかっ
+         たので断念しました.
+ 
+     3. コマンドライン引数について
+ 
+         前述したとおり MS-C の setargv.obj をリンクしていますので, その
+         仕様に従わなければなりません.
+ 
+         １つ１つの引数は空白で区切ります.  引数に空白, ", \, <, >, | を
+         含むときはクォーティングが必要です.  その方法は COMMAND.COM のバ
+         グ臭い仕様と, さらに setargv.obj にも問題があり, かなり難しいの
+         でここでは説明を省きます.  各自研究してください.  一番簡単なのは,
+         gawk のプログラムをファイルにして
+ 
+             gawk -f ファイル名
+ 
+         とすることです.
+ 
+     4. 環境
+ 
+         AWKPATH がセットされていれば, その値をディレクトリのリストとみな
+         し, -f で指定されたファイルをそのディレクトリから探します.  PATH 
+         などと同様に, それぞれのディレクトリはセミコロン (;) で区切りま
+         す.
+ 
+     5. マニュアル
+ 
+         roff 系のフォーマッタを使えない人のために GNU roff でフォーマッ
+         ト済みのマニュアルを用意しました.  ボールドフェース, アンダーラ
+         イン対応の less などでお読みください.  エディタなどでは ^H が入っ
+         ていて読みにくいと思います.
+ 
+             s/.^H//g
+ 
+         という sed のプログラムに通せば, 通常のテキストファイルが得られ
+         ます.  (^H というのはコントロールコードを直接埋めこむという意味
+         です.)
+ 
+ ●インストール (MS-DOS 以外)
+ 
+     デフォルトのマルチバイト文字の設定は, Makefile.in の中で指定します. 
+     デフォルトを Shift-JIS とする場合と, デフォルトでマルチバイト文字を
+     使用しない場合は Makefile.in の MBCTYPE_DEF マクロの定義をそれぞれ以
+     下のように変えてください.
+ 
+         MBCTYPE_DEF = -DSJIS            (デフォルトで Shift-JIS の場合)
+         MBCTYPE_DEF =                   (デフォルトで使用しない場合)
+ 
+     いずれの場合でも起動時のオプションによりマルチバイト文字コードの選択
+     が可能です.
+ 
+     memmove 関数がライブラリにないシステムではリンク時にエラーが出ること
+     があります.  そのときは config/* の対応するファイルに
+ 
+            MEMMOVE_MISSING 1
+ 
+     という行を追加して configure をやり直してください.
+ 
+     その他の作業は, オリジナルの gawk と同様です.
+ 
+ ●インストール (MS-DOS 版.  ここでいうインストールとは, ソースからのイン
+   ストールのことです)
+ 
+     まず, pc/* を, このディレクトリにコピーしてください.
+ 
+         A>copy pc\*.* .
+ 
+     MS-C 6.00A を使用して, デフォルトで Shift-JIS を認識する gawk を作成
+     する場合は, README.MSC に目を通して, 必要ならライブラリにパッチを当
+     て,
+ 
+         A>nmake -f Makefile.msc DEFS=
+ 
+     とするだけでＯＫです. オプティマイズは -Ox まで大丈夫なようです. コ
+     ンパイルが無事終了したら,
+ 
+         A>nmake -f Makefile.msc test
+ 
+     と入力して, 動作チェックをしてみてください.
+ 
+     その他の処理系を使用する場合や, デフォルトを Shift-JIS 以外にする場
+     合は Makefile.msc を書き換えて使用してください.
+ 
+     また, 今回から GO32 をサポートしています.
+ 
+         A>copy pc\*.* .
+         A>del config.h
+         A>ren config.go3 config.h
+         A>make -f Makefile.go3
+ 
+     とすればコンパイルされるはずです. DJGPP 1.11 でのみ確認済みです. た
+     だし, DJGPP 版 GNU make を使用した場合,
+ 
+         A>make -f Makefile.go3 test
+ 
+     が何故かこけます. (単に私の設定が悪いだけかも知れませんが.) そのとき
+     は test/Makefile.dos のこけた部分を見て, 手動でやってみてください.
+ 
+     なお, この動作チェックを行う場合は, rm, wc などのツールが必要です. 
+     持っていない方は test/Makefile.dos を書き換えるなり, rm, wc, etc. を
+     作成するなりしてしのいでください. redir という見慣れないプログラムも
+     使用していますが, これは, 標準エラー出力をリダイレクトしたりするため
+     に即興で作ったプログラムです. これのソースはおまけとして pc/ ディレ
+     クトリに入れておきました. しょうもないプログラムですので決してコード
+     を覗き見したりしないように(笑). 簡易マニュアルを pc/redir.txt に書い
+     ています.
+ 
+ ●バグ
+ 
+     1. いわゆる JIS には対応していません.  将来対応する予定もありません.
+ 
+     2. マルチバイト文字コードはあまり厳格には考えていません.
+ 
+         EUC       １バイト目 ... 0x80 - 0xff
+         EUC       ２バイト目 ... 0x01 - 0xff (0x0a を除く)
+ 
+         Shift-JIS １バイト目 ... 0x80 - 0x9f, 0xe0 - 0xff
+         Shift-JIS ２バイト目 ... 0x01 - 0xff (0x0a を除く)
+ 
+        として処理しています.  半角カナも使えるはずです.  EUC の SS3
+        (0x8f) に始まる３バイトコードは使えません.  (私はこれをサポートし
+        ているシステムを見たことがない...)
+ 
+ ●アルゴリズム (dfa.[ch] のマルチバイト文字対応化)
+ 
+     以前は漠然と, DFA を直接 EUC や Shift-JIS のような文字種の多いコード
+     セットに対応させるのは, 非常に難しいと思っていました.  ところがある
+     日, 自作ライブラリのテスト用に, 正規表現を DFA へ変換する簡単なプロ
+     グラムを書いたときに, 突然うまいアイディアが閃いたのです.  マルチバ
+     イト文字といえども結局はバイトの並びです.  マルチバイト文字を, すべ
+     てバイト単位に分解して, 正規表現を作ってしまえばよかったのです.
+ 
+     言葉ではうまく表現できないので, 以下の記号を使用し, どういうふうにバ
+     イト単位に分解しているのか, 例を挙げます.
+ 
+             a, b, c ... シングルバイト文字.
+             x, y, z ... マルチバイト文字の１文字目.
+ 
+         . (任意の１文字)
+           ==> [a-c]|[x-z][a-z]
+ 
+           (シングルバイト文字か, またはマルチバイト文字の１文字目と
+           任意の１文字の連接.)
+ 
+         [xb-zx] (xb から zx の範囲のマルチバイト文字
+           ==> x[b-z]|y[a-z]|z[a-x]
+ 
+         yb*
+           ==> (yb)*
+ 
+     実際には正規表現を作り出すのではなく, 正規表現を分解したトークンを直
+     接生成しています.  この辺, 興味がある方はソースを見たほうが早いと思
+     います.  (あまりエレガントではありませんのでソースをじっくり見られる
+     のは恥ずかしい気もしますが...)
+ 
+     これだけでは, 例えばあるテキストから xy という文字を探そうとすると,
+     xxyy のような文字の並びにまで反応してしまいます.  そこで, マルチバイ
+     トモードのときには必ず "^.*(" + ユーザパターン + ")" として処理しま
+     す.  '.*' により, '.' はマルチバイト文字の一部にはマッチしませんから, 
+     頭出しできるわけです.
+ 
+ ● dfa.[ch], regex.[ch] の拡張仕様 (他のアプリケーションへ応用したい方へ)
+ 
+     dfa.[ch], regex.[ch] モジュールは mbc.[ch] モジュールに依存していま
+     す.  また, これはオリジナルの仕様ですが, dfa.[ch] を使用する場合は 
+     regex.h の定義が必要です.
+ 
+     マルチバイト文字のタイプは, mbc.[ch] の mbcinit() で設定します.
+     mbc.h に定義されているマクロ MBCTYPE_ASCII, MBCTYPE_EUC,
+     MBCTYPE_SJIS のいずれかを mbcinit() に渡してください.
+ 
+     dfa.[ch] は, パターンのコンパイル時にだけ, この mbc.[ch] の設定を参
+     照します.  パターンマッチングの際は, コンパイル時に設定されていた, 
+     マルチバイト文字のタイプを検索します.
+ 
+     一方, regex.[ch] は, パターンコンパイル時, マッチング時の両方で 
+     mbc.[ch] の設定を参照します.  が, この両者で mbc.[ch] の設定を変更す
+     ることはできません.  つまり, Shift-JIS で記述されたパターンを, EUC 
+     テキストから検索するといった動作はできません.  注意してください.
+ 
+     マルチバイト文字対応に伴って注意すべき正規表現を以下に記します.
+ 
+         .       任意の１バイト文字, 正当なマルチバイト文字にマッチします. 
+                 「正当なマルチバイト文字」とは, マルチバイト文字の１文字
+                 目に, '\0' または '\n' 以外が続く文字のことです.
+ 
+         [x-y]   文字コード (内部表現) が x から y の範囲にある任意の１文
+                 字にマッチします.  これも . と同じく, 正当でない文字には
+                 マッチしません.
+ 
+         [^x-y]  文字コード (内部表現) が x から y の範囲にない任意の１文
+                 字にマッチします.  正当でない文字にもマッチします.
+ 
+     マルチバイト文字の内部表現は単に１バイト目を上位バイト, ２バイト目を
+     下位バイトとした１６ビット符号なし整数です.  Shift-JIS でも EUC でも
+ 
+         １バイト ASCII 文字 < 半角カナ文字 < 全角文字
+ 
+     という大小関係が成り立っています.
+ 
+ ●条件など
+ 
+     1. オリジナルの GNU awk の著作権は Free Software Foundation, Inc. が
+        有しています. パッチ部分 (gawk-mb.diff) の著作権は私 (t^2) が有し
+        ています.
+ 
+     2. GNU awk のソースコードは各所の ftp サイト, もしくは Nifty-serve 
+        の FUNIX のデータライブラリから入手可能です. GNU awk から gawk+mb 
+        への差分 gawk-mb.diff は, 私が FUNIX へ登録し, 堂園和郎氏 
+        (dohzono@sdsft.kme.mei.co.jp) が fj.sources へポストしてくださっ
+        ています.
+ 
+     3. 差分 gawk-mb.diff の再配布は自由です. これに関しては FSF の規定に
+        従う必要もありません. しかし差分を適用した結果のソースコード, お
+        よび実行形式での再配布の際は GNU GENERAL PUBLIC LICENSE (COPYING 
+        参照) に従ってください.
+ 
+        gawk+mb に何らかの改変を加えたものを再配布する際も, GNU GENERAL
+        PUBLIC LICENSE に従うように注意してください. また gawk+mb に含ま
+        れるコード (dfa.[ch] や regex.[ch] など) を利用したプログラムを配
+        布する際も GNU GENERAL PUBLIC LICENSE の該当部分に従ってください.
+ 
+        また義務ではありませんが再配布される方は事後にでも連絡をください. 
+        そして可能な限り, 新しいバージョンへのアップデートに努め, 利用者
+        からの連絡が私に届くように配慮してください.
+ 
+     4. このプログラムは無保証です.
+ 
+     5. gawk+mb に何らかの不具合が発生した場合, (FSF や, オリジナルの作者
+        ではなく) 私に連絡してください. 配布した人が希望している場合は, 
+        その人に連絡してください.
+ 
+     6. ご質問/ご要望/お叱り, その他も大歓迎です. できるかぎりサポートし
+        ます.
+ 
+ ●謝辞
+ 
+     原作者および FSF に感謝します.
+ 
+     このドキュメント作成に関して多くの助言をくださった堂園和郎氏 
+     <dohzono@sdsft.kme.mei.co.jp> に感謝します. また, fj.sources での配
+     布にもご尽力頂いています.
+ 
+     これまで転載/バグ報告をくださった方々に感謝します. 実名を挙げさせて
+     頂きたかったのですがハードディスクのトラブルでほとんどのメールを消失
+     させてしまいました. (バグ報告分に関しては ChangeLog.MB に残っていま
+     す.)
+ 
+     最後に, 貴重なディスクスペースを gawk+mb のために割いてご使用頂いて
+     いるすべての利用者の方々に感謝いたします.
+ 
+ ●「私」の連絡先
+ 
+     〒810  福岡市中央区梅光園団地 7-207               (注: 転居しました)
+     TEL/FAX: 092-731-4025 (TEL/FAX 自動切替え)
+              092-724-6342 (TEL のみ)
+     E-mail: NBC02362@niftyserve.or.jp                           谷本孝浩
+ 
+ # Local variables:
+ # mode: indented-text
+ # indent-tabs-mode: nil
+ # tab-stop-list: (4 8 16 24 32 40 48 56 64 72 80)
+ # left-margin: 4
+ # fill-column: 72
+ # fill-prefix: "    "
+ # version-control: never
+ # End:
diff -crP php-2.0.1/src/jp.regex/dfa.c php-2.0.1.jp_urat-5.3/src/jp.regex/dfa.c
*** php-2.0.1/src/jp.regex/dfa.c	Thu Jan  1 09:00:00 1970
--- php-2.0.1.jp_urat-5.3/src/jp.regex/dfa.c	Wed Feb 18 21:03:40 1998
***************
*** 0 ****
--- 1,2865 ----
+ /* dfa.c - deterministic extended regexp routines for GNU
+    Copyright (C) 1988 Free Software Foundation, Inc.
+ 
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2, or (at your option)
+    any later version.
+ 
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+ 
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+ 
+ /* Written June, 1988 by Mike Haertel
+    Modified July, 1988 by Arthur David Olson to assist BMG speedups  */
+ /* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto)
+    Last change: Aug. 28, 1994 by t^2  */
+ 
+ #include <assert.h>
+ #include <ctype.h>
+ #include <stdio.h>
+ 
+ #ifdef HAVE_CONFIG_H
+ #include "config.h"
+ #endif
+ 
+ #ifdef STDC_HEADERS
+ #include <stdlib.h>
+ #else
+ #include <sys/types.h>
+ extern char *calloc(), *malloc(), *realloc();
+ extern void free();
+ #endif
+ 
+ #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
+ #include <string.h>
+ #undef index
+ #define index strchr
+ #undef bcopy
+ #define bcopy(s, d, n) memcpy(d, s, n)
+ #undef bzero
+ #define bzero(d, n) memset(d, 0, n)
+ #else
+ #include <strings.h>
+ #endif
+ 
+ #ifndef DEBUG	/* use the same approach as regex.c */
+ #undef assert
+ #define assert(e)
+ #endif /* DEBUG */
+ 
+ #ifndef isgraph
+ #define isgraph(C) (isprint(C) && !isspace(C))
+ #endif
+ 
+ #ifdef isascii
+ #define ISALPHA(C) (isascii(C) && isalpha(C))
+ #define ISUPPER(C) (isascii(C) && isupper(C))
+ #define ISLOWER(C) (isascii(C) && islower(C))
+ #define ISDIGIT(C) (isascii(C) && isdigit(C))
+ #define ISXDIGIT(C) (isascii(C) && isxdigit(C))
+ #define ISSPACE(C) (isascii(C) && isspace(C))
+ #define ISPUNCT(C) (isascii(C) && ispunct(C))
+ #define ISALNUM(C) (isascii(C) && isalnum(C))
+ #define ISPRINT(C) (isascii(C) && isprint(C))
+ #define ISGRAPH(C) (isascii(C) && isgraph(C))
+ #define ISCNTRL(C) (isascii(C) && iscntrl(C))
+ #else
+ #define ISALPHA(C) isalpha(C)
+ #define ISUPPER(C) isupper(C)
+ #define ISLOWER(C) islower(C)
+ #define ISDIGIT(C) isdigit(C)
+ #define ISXDIGIT(C) isxdigit(C)
+ #define ISSPACE(C) isspace(C)
+ #define ISPUNCT(C) ispunct(C)
+ #define ISALNUM(C) isalnum(C)
+ #define ISPRINT(C) isprint(C)
+ #define ISGRAPH(C) isgraph(C)
+ #define ISCNTRL(C) iscntrl(C)
+ #endif
+ 
+ #include "regex.h"
+ #include "dfa.h"
+ #include "mbc.h"
+ 
+ #ifdef __STDC__
+ typedef void *ptr_t;
+ #else
+ typedef char *ptr_t;
+ #endif
+ 
+ static void dfamust _RE_ARGS((struct dfa *dfa));
+ 
+ static ptr_t xcalloc _RE_ARGS((size_t n, size_t s));
+ static ptr_t xmalloc _RE_ARGS((size_t n));
+ static ptr_t xrealloc _RE_ARGS((ptr_t p, size_t n));
+ #ifdef DEBUG
+ static void prtok _RE_ARGS((token t));
+ #endif
+ static int tstbit _RE_ARGS((int b, charclass c));
+ static void setbit _RE_ARGS((int b, charclass c));
+ static void clrbit _RE_ARGS((int b, charclass c));
+ static void copyset _RE_ARGS((charclass src, charclass dst));
+ static void zeroset _RE_ARGS((charclass s));
+ static void notset _RE_ARGS((charclass s));
+ static int equal _RE_ARGS((charclass s1, charclass s2));
+ static int charclass_index _RE_ARGS((charclass s));
+ static int looking_at _RE_ARGS((const char *s));
+ static token lex _RE_ARGS((void));
+ static void addtok _RE_ARGS((token t));
+ static void atom _RE_ARGS((void));
+ static int nsubtoks _RE_ARGS((int tindex));
+ static void copytoks _RE_ARGS((int tindex, int ntokens));
+ static void closure _RE_ARGS((void));
+ static void branch _RE_ARGS((void));
+ static void regexp _RE_ARGS((int toplevel));
+ static void copy _RE_ARGS((position_set *src, position_set *dst));
+ static void insert _RE_ARGS((position p, position_set *s));
+ static void merge _RE_ARGS((position_set *s1, position_set *s2, position_set *m));
+ static void delete _RE_ARGS((position p, position_set *s));
+ static int state_index _RE_ARGS((struct dfa *d, position_set *s,
+ 			  int newline, int letter));
+ static void build_state _RE_ARGS((int s, struct dfa *d));
+ static void build_state_zero _RE_ARGS((struct dfa *d));
+ static char *icatalloc _RE_ARGS((char *old, char *new));
+ static char *icpyalloc _RE_ARGS((char *string));
+ static char *istrstr _RE_ARGS((char *lookin, char *lookfor));
+ static void ifree _RE_ARGS((char *cp));
+ static void freelist _RE_ARGS((char **cpp));
+ static char **enlist _RE_ARGS((char **cpp, char *new, size_t len));
+ static char **comsubs _RE_ARGS((char *left, char *right));
+ static char **addlists _RE_ARGS((char **old, char **new));
+ static char **inboth _RE_ARGS((char **left, char **right));
+ 
+ static ptr_t
+ xcalloc(n, s)
+      size_t n;
+      size_t s;
+ {
+   ptr_t r = calloc(n, s);
+ 
+   if (!r)
+     fprintf(stderr,"Memory exhausted");
+   return r;
+ }
+ 
+ static ptr_t
+ xmalloc(n)
+      size_t n;
+ {
+   ptr_t r = malloc(n);
+ 
+   assert(n != 0);
+   if (!r)
+     fprintf(stderr,"Memory exhausted");
+   return r;
+ }
+ 
+ static ptr_t
+ xrealloc(p, n)
+      ptr_t p;
+      size_t n;
+ {
+   ptr_t r = realloc(p, n);
+ 
+   assert(n != 0);
+   if (!r)
+     fprintf(stderr,"Memory exhausted");
+   return r;
+ }
+ 
+ #define CALLOC(p, t, n) ((p) = (t *) xcalloc((size_t)(n), sizeof (t)))
+ #define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t)))
+ #define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t)))
+ 
+ /* Reallocate an array of type t if nalloc is too small for index. */
+ #define REALLOC_IF_NECESSARY(p, t, nalloc, index) \
+   if ((index) >= (nalloc))			  \
+     {						  \
+       while ((index) >= (nalloc))		  \
+ 	(nalloc) *= 2;				  \
+       REALLOC(p, t, nalloc);			  \
+     }
+ 
+ #ifdef DEBUG
+ 
+ static void
+ prtok(t)
+      token t;
+ {
+   char *s;
+ 
+   if (t < 0)
+     fprintf(stderr, "END");
+   else if (t < NOTCHAR)
+    if (t & 0x80)
+     fprintf(stderr, "0x%02x", (unsigned char)t);
+    else
+     fprintf(stderr, "%c", t);
+   else
+     {
+       switch (t)
+ 	{
+ 	case EMPTY: s = "EMPTY"; break;
+ 	case BACKREF: s = "BACKREF"; break;
+ 	case BEGLINE: s = "BEGLINE"; break;
+ 	case ENDLINE: s = "ENDLINE"; break;
+ 	case BEGWORD: s = "BEGWORD"; break;
+ 	case ENDWORD: s = "ENDWORD"; break;
+ 	case LIMWORD: s = "LIMWORD"; break;
+ 	case NOTLIMWORD: s = "NOTLIMWORD"; break;
+ 	case QMARK: s = "QMARK"; break;
+ 	case STAR: s = "STAR"; break;
+ 	case PLUS: s = "PLUS"; break;
+ 	case CAT: s = "CAT"; break;
+ 	case OR: s = "OR"; break;
+ 	case ORTOP: s = "ORTOP"; break;
+ 	case LPAREN: s = "LPAREN"; break;
+ 	case RPAREN: s = "RPAREN"; break;
+ 	default: s = "CSET"; break;
+ 	}
+       fprintf(stderr, "%s", s);
+     }
+ }
+ #endif /* DEBUG */
+ 
+ /* Stuff pertaining to charclasses. */
+ 
+ static int
+ tstbit(b, c)
+      int b;
+      charclass c;
+ {
+   return c[b / INTBITS] & 1 << b % INTBITS;
+ }
+ 
+ static void
+ setbit(b, c)
+      int b;
+      charclass c;
+ {
+   c[b / INTBITS] |= 1 << b % INTBITS;
+ }
+ 
+ static void
+ clrbit(b, c)
+      int b;
+      charclass c;
+ {
+   c[b / INTBITS] &= ~(1 << b % INTBITS);
+ }
+ 
+ static void
+ copyset(src, dst)
+      charclass src;
+      charclass dst;
+ {
+   int i;
+ 
+   for (i = 0; i < CHARCLASS_INTS; ++i)
+     dst[i] = src[i];
+ }
+ 
+ static void
+ zeroset(s)
+      charclass s;
+ {
+   int i;
+ 
+   for (i = 0; i < CHARCLASS_INTS; ++i)
+     s[i] = 0;
+ }
+ 
+ static void
+ notset(s)
+      charclass s;
+ {
+   int i;
+ 
+   for (i = 0; i < CHARCLASS_INTS; ++i)
+     s[i] = ~s[i];
+ }
+ 
+ static int
+ equal(s1, s2)
+      charclass s1;
+      charclass s2;
+ {
+   int i;
+ 
+   for (i = 0; i < CHARCLASS_INTS; ++i)
+     if (s1[i] != s2[i])
+       return 0;
+   return 1;
+ }
+ 
+ static int
+ isemptyset(s)
+      charclass s;
+ {
+   int i;
+ 
+   for (i = 0; i < CHARCLASS_INTS; i++)
+     if (s[i])
+       return 0;
+   return 1;
+ }
+ 
+ /* A pointer to the current dfa is kept here during parsing. */
+ static struct dfa *dfa;
+ 
+ /* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */
+ static int
+ charclass_index(s)
+      charclass s;
+ {
+   int i;
+ 
+   for (i = 0; i < dfa->cindex; ++i)
+     if (equal(s, dfa->charclasses[i]))
+       return i;
+   REALLOC_IF_NECESSARY(dfa->charclasses, charclass, dfa->calloc, dfa->cindex);
+   ++dfa->cindex;
+   copyset(s, dfa->charclasses[i]);
+   return i;
+ }
+ 
+ /* Syntax bits controlling the behavior of the lexical analyzer. */
+ static reg_syntax_t syntax_bits, syntax_bits_set;
+ 
+ /* Flag for case-folding letters into sets. */
+ static int case_fold;
+ 
+ /* Entry point to set syntax options. */
+ void
+ dfasyntax(bits, fold)
+      reg_syntax_t bits;
+      int fold;
+ {
+   syntax_bits_set = 1;
+   syntax_bits = bits;
+   case_fold = fold;
+ }
+ 
+ /* Lexical analyzer.  All the dross that deals with the obnoxious
+    GNU Regex syntax bits is located here.  The poor, suffering
+    reader is referred to the GNU Regex documentation for the
+    meaning of the @#%!@#%^!@ syntax bits. */
+ 
+ static char *lexstart;		/* Pointer to beginning of input string. */
+ static char *lexptr;		/* Pointer to next input character. */
+ static lexleft;			/* Number of characters remaining. */
+ static token lasttok;		/* Previous token returned; initially END. */
+ static int laststart;		/* True if we're separated from beginning or (, |
+ 				   only by zero-width characters. */
+ static int parens;		/* Count of outstanding left parens. */
+ static int minrep, maxrep;	/* Repeat counts for {m,n}. */
+ 
+ static charclass cs_cset[8];
+ static unsigned char cs_ready[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+ 
+ static enum {
+   MBEXTTOK_NONE = -1,
+   MBEXTTOK_NOTCHAR = 256,
+   MBEXTTOK_ORMBC = MBEXTTOK_NOTCHAR,
+   MBEXTTOK_ORMBC_NL,
+   MBEXTTOK_CLASS,
+   MBEXTTOK_INVCLASS,
+ } mbexttok = MBEXTTOK_NONE;
+ 
+ static charclass mbcset_set;
+ static charclass mbcset_all;
+ static charclass mbcset[128];	/* 128*256/8 = 4 Kbytes */
+ 
+ /* 頻繁に使用される (と思われる) 文字集合をトークンとして返す.
+    n = 0 ... 1バイト文字全体の集合.
+        1 ... 2バイト文字の1バイト目全体の集合.
+        2 ... 2バイト文字の2バイト目全体の集合.
+       +4 ... '\n'を除外しない. */
+ static token
+ setcodeset(n)
+      int n;
+ {
+   token c;
+ 
+   if (!cs_ready[n]) {
+     zeroset(cs_cset[n]);
+     switch (n) {
+     case 0:
+     case 4:
+       /* 1バイト文字全体の集合. */
+       for (c = 0; c < NOTCHAR; c++)
+ 	if (ismbchar(c))
+ 	  setbit(c, cs_cset[n]);
+       notset(cs_cset[n]);
+       break;
+     case 1:
+     case 5:
+       /* 2バイト文字の1文字目全体の集合. */
+       for (c = 0; c < NOTCHAR; c++)
+ 	if (ismbchar(c))
+ 	  setbit(c, cs_cset[n]);
+       break;
+     case 2:
+     case 6:
+       /* 2バイト文字の2文字目全体の集合. */
+       notset(cs_cset[n]);
+       break;
+     }
+     if (!(n & 4)) {
+       if (syntax_bits & RE_DOT_NOT_NULL || n != 0)
+ 	clrbit('\0', cs_cset[n]);
+       if (!(syntax_bits & RE_DOT_NEWLINE) || n != 0)
+ 	clrbit('\n', cs_cset[n]);
+     }
+     cs_ready[n] = 1;
+   }
+   return CSET + charclass_index(cs_cset[n]);
+ }
+ 
+ /* Note that characters become unsigned here. */
+ #define FETCH(c, eoferr)   	      \
+   {			   	      \
+     if (! lexleft)	   	      \
+       if (eoferr != 0)	   	      \
+ 	fprintf(stderr,eoferr);  	      \
+       else		   	      \
+ 	return lasttok = END;	      \
+     (c) = (unsigned char) *lexptr++;  \
+     --lexleft;		   	      \
+   }
+ 
+ #ifdef __STDC__
+ #define FUNC(F, P) static int F(int c) { return P(c); }
+ #else
+ #define FUNC(F, P) static int F(c) int c; { return P(c); }
+ #endif
+ 
+ FUNC(is_alpha, ISALPHA)
+ FUNC(is_upper, ISUPPER)
+ FUNC(is_lower, ISLOWER)
+ FUNC(is_digit, ISDIGIT)
+ FUNC(is_xdigit, ISXDIGIT)
+ FUNC(is_space, ISSPACE)
+ FUNC(is_punct, ISPUNCT)
+ FUNC(is_alnum, ISALNUM)
+ FUNC(is_print, ISPRINT)
+ FUNC(is_graph, ISGRAPH)
+ FUNC(is_cntrl, ISCNTRL)
+ 
+ /* The following list maps the names of the Posix named character classes
+    to predicate functions that determine whether a given character is in
+    the class.  The leading [ has already been eaten by the lexical analyzer. */
+ static struct {
+   const char *name;
+   int (*pred) _RE_ARGS((int));
+ } prednames[] = {
+   { ":alpha:]", is_alpha },
+   { ":upper:]", is_upper },
+   { ":lower:]", is_lower },
+   { ":digit:]", is_digit },
+   { ":xdigit:]", is_xdigit },
+   { ":space:]", is_space },
+   { ":punct:]", is_punct },
+   { ":alnum:]", is_alnum },
+   { ":print:]", is_print },
+   { ":graph:]", is_graph },
+   { ":cntrl:]", is_cntrl },
+   { 0 }
+ };
+ 
+ static int
+ looking_at(s)
+      const char *s;
+ {
+   size_t len;
+ 
+   len = strlen(s);
+   if (lexleft < len)
+     return 0;
+   return strncmp(s, lexptr, len) == 0;
+ }
+ 
+ static token
+ lex()
+ {
+   token c, c1, c2;
+   int backslash = 0, invert;
+   charclass ccl;
+   int i;
+ 
+   /* Basic plan: We fetch a character.  If it's a backslash,
+      we set the backslash flag and go through the loop again.
+      On the plus side, this avoids having a duplicate of the
+      main switch inside the backslash case.  On the minus side,
+      it means that just about every case begins with
+      "if (backslash) ...".  */
+   mbexttok = MBEXTTOK_NONE;
+   for (i = 0; i < 2; ++i)
+     {
+       FETCH(c, 0);
+       switch (c)
+ 	{
+ 	case '\\':
+ 	  if (backslash)
+ 	    goto normal_char;
+ 	  if (lexleft == 0)
+ 	    fprintf(stderr,"Unfinished \\ escape");
+ 	  backslash = 1;
+ 	  break;
+ 
+ 	case '^':
+ 	  if (backslash)
+ 	    goto normal_char;
+ 	  if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
+ 	      || lasttok == END
+ 	      || lasttok == LPAREN
+ 	      || lasttok == OR)
+ 	    return lasttok = BEGLINE;
+ 	  goto normal_char;
+ 
+ 	case '$':
+ 	  if (backslash)
+ 	    goto normal_char;
+ 	  if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
+ 	      || lexleft == 0
+ 	      || (syntax_bits & RE_NO_BK_PARENS
+ 		  ? lexleft > 0 && *lexptr == ')'
+ 		  : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == ')')
+ 	      || (syntax_bits & RE_NO_BK_VBAR
+ 		  ? lexleft > 0 && *lexptr == '|'
+ 		  : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == '|')
+ 	      || ((syntax_bits & RE_NEWLINE_ALT)
+ 	          && lexleft > 0 && *lexptr == '\n'))
+ 	    return lasttok = ENDLINE;
+ 	  goto normal_char;
+ 
+ 	case '1':
+ 	case '2':
+ 	case '3':
+ 	case '4':
+ 	case '5':
+ 	case '6':
+ 	case '7':
+ 	case '8':
+ 	case '9':
+ 	  if (backslash && !(syntax_bits & RE_NO_BK_REFS))
+ 	    {
+ 	      laststart = 0;
+ 	      return lasttok = BACKREF;
+ 	    }
+ 	  goto normal_char;
+ 
+ 	case '<':
+ 	  if (syntax_bits & RE_NO_GNU_OPS)
+ 	    goto normal_char;
+ 	  if (backslash)
+ 	    return lasttok = BEGWORD;
+ 	  goto normal_char;
+ 
+ 	case '>':
+ 	  if (syntax_bits & RE_NO_GNU_OPS)
+ 	    goto normal_char;
+ 	  if (backslash)
+ 	    return lasttok = ENDWORD;
+ 	  goto normal_char;
+ 
+ 	case 'b':
+ 	  if (syntax_bits & RE_NO_GNU_OPS)
+ 	    goto normal_char;
+ 	  if (backslash)
+ 	    return lasttok = LIMWORD;
+ 	  goto normal_char;
+ 
+ 	case 'B':
+ 	  if (syntax_bits & RE_NO_GNU_OPS)
+ 	    goto normal_char;
+ 	  if (backslash)
+ 	    return lasttok = NOTLIMWORD;
+ 	  goto normal_char;
+ 
+ 	case '?':
+ 	  if (syntax_bits & RE_LIMITED_OPS)
+ 	    goto normal_char;
+ 	  if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
+ 	    goto normal_char;
+ 	  if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ 	    goto normal_char;
+ 	  return lasttok = QMARK;
+ 
+ 	case '*':
+ 	  if (backslash)
+ 	    goto normal_char;
+ 	  if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ 	    goto normal_char;
+ 	  return lasttok = STAR;
+ 
+ 	case '+':
+ 	  if (syntax_bits & RE_LIMITED_OPS)
+ 	    goto normal_char;
+ 	  if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
+ 	    goto normal_char;
+ 	  if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ 	    goto normal_char;
+ 	  return lasttok = PLUS;
+ 
+ 	case '{':
+ 	  if (!(syntax_bits & RE_INTERVALS))
+ 	    goto normal_char;
+ 	  if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0))
+ 	    goto normal_char;
+ 	  minrep = maxrep = 0;
+ 	  /* Cases:
+ 	     {M} - exact count
+ 	     {M,} - minimum count, maximum is infinity
+ 	     {,M} - 0 through M
+ 	     {M,N} - M through N */
+ 	  FETCH(c, "unfinished repeat count");
+ 	  if (ISDIGIT(c))
+ 	    {
+ 	      minrep = c - '0';
+ 	      for (;;)
+ 		{
+ 		  FETCH(c, "unfinished repeat count");
+ 		  if (!ISDIGIT(c))
+ 		    break;
+ 		  minrep = 10 * minrep + c - '0';
+ 		}
+ 	    }
+ 	  else if (c != ',')
+ 	    fprintf(stderr,"malformed repeat count");
+ 	  if (c == ',')
+ 	    for (;;)
+ 	      {
+ 		FETCH(c, "unfinished repeat count");
+ 		if (!ISDIGIT(c))
+ 		  break;
+ 		maxrep = 10 * maxrep + c - '0';
+ 	      }
+ 	  else
+ 	    maxrep = minrep;
+ 	  if (!(syntax_bits & RE_NO_BK_BRACES))
+ 	    {
+ 	      if (c != '\\')
+ 		fprintf(stderr,"malformed repeat count");
+ 	      FETCH(c, "unfinished repeat count");
+ 	    }
+ 	  if (c != '}')
+ 	    fprintf(stderr,"malformed repeat count");
+ 	  laststart = 0;
+ 	  return lasttok = REPMN;
+ 
+ 	case '|':
+ 	  if (syntax_bits & RE_LIMITED_OPS)
+ 	    goto normal_char;
+ 	  if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0))
+ 	    goto normal_char;
+ 	  laststart = 1;
+ 	  return lasttok = OR;
+ 
+ 	case '\n':
+ 	  if (syntax_bits & RE_LIMITED_OPS
+ 	      || backslash
+ 	      || !(syntax_bits & RE_NEWLINE_ALT))
+ 	    goto normal_char;
+ 	  laststart = 1;
+ 	  return lasttok = OR;
+ 
+ 	case '(':
+ 	  if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
+ 	    goto normal_char;
+ 	  ++parens;
+ 	  laststart = 1;
+ 	  return lasttok = LPAREN;
+ 
+ 	case ')':
+ 	  if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
+ 	    goto normal_char;
+ 	  if (parens == 0 && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ 	    goto normal_char;
+ 	  --parens;
+ 	  laststart = 0;
+ 	  return lasttok = RPAREN;
+ 
+ 	case '.':
+ 	  if (backslash)
+ 	    goto normal_char;
+ 	  if (current_mbctype != MBCTYPE_ASCII)
+ 	    mbexttok = MBEXTTOK_ORMBC;
+ 	  laststart = 0;
+ 	  return lasttok = setcodeset(0);
+ 
+ 	case 'w':
+ 	  if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
+ 	    goto normal_char;
+ 	  zeroset(ccl);
+ 	  for (c2 = 0; c2 < NOTCHAR; ++c2)
+ 	    if (ISALNUM(c2))
+ 	      setbit(c2, ccl);
+ 	  laststart = 0;
+ 	  return lasttok = CSET + charclass_index(ccl);
+ 
+ 	case 'W':
+ 	  if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
+ 	    goto normal_char;
+ 	  zeroset(ccl);
+ 	  for (c2 = 0; c2 < NOTCHAR; ++c2)
+ 	    if (!ISALNUM(c2) && !ismbchar(c2))
+ 	      setbit(c2, ccl);
+ 	  mbexttok = MBEXTTOK_ORMBC_NL;
+ 	  laststart = 0;
+ 	  return lasttok = CSET + charclass_index(ccl);
+ 
+ 	case '[':
+ 	  if (backslash)
+ 	    goto normal_char;
+ 	  zeroset(ccl);
+ 	  FETCH(c, "Unbalanced [");
+ 	  if (c == '^')
+ 	    {
+ 	      FETCH(c, "Unbalanced [");
+ 	      invert = 1;
+ 	    }
+ 	  else
+ 	    invert = 0;
+ 	  do
+ 	    {
+ 	      unsigned char ch = 0, c2h = 0;
+ 
+ 	      /* Nobody ever said this had to be fast. :-)
+ 		 Note that if we're looking at some other [:...:]
+ 		 construct, we just treat it as a bunch of ordinary
+ 		 characters.  We can do this because we assume
+ 		 regex has checked for syntax errors before
+ 		 dfa is ever called. */
+ 	      if (c == '[' && (syntax_bits & RE_CHAR_CLASSES))
+ 		for (c1 = 0; prednames[c1].name; ++c1)
+ 		  if (looking_at(prednames[c1].name))
+ 		    {
+ 		      for (c2 = 0; c2 < NOTCHAR; ++c2)
+ 			if ((*prednames[c1].pred)(c2))
+ 			  setbit(c2, ccl);
+ 		      lexptr += strlen(prednames[c1].name);
+ 		      lexleft -= strlen(prednames[c1].name);
+ 		      FETCH(c1, "Unbalanced [");
+ 		      goto skip;
+ 		    }
+ 	      if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ 		FETCH(c, "Unbalanced [");
+ 	      if (ismbchar(c)) {
+ 		ch = (unsigned char)c;
+ 		FETCH(c, "Multi-byte char incomplete");
+ 	      }
+ 	      FETCH(c1, "Unbalanced [");
+ 	      if (c1 == '-')
+ 		{
+ 		  FETCH(c2, "Unbalanced [");
+ 		  if (c2 == ']')
+ 		    {
+ 		      /* In the case [x-], the - is an ordinary hyphen,
+ 			 which is left in c1, the lookahead character. */
+ 		      --lexptr;
+ 		      ++lexleft;
+ 		      c2 = c;
+ 		    }
+ 		  else
+ 		    {
+ 		      if (c2 == '\\'
+ 			  && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ 			FETCH(c2, "Unbalanced [");
+ 		      if (ismbchar(c2)) {
+ 			c2h = (unsigned char)c2;
+ 			FETCH(c2, "Multi-byte char incomplete");
+ 		      }
+ 		      FETCH(c1, "Unbalanced [");
+ 		    }
+ 		}
+ 	      else {
+ 		c2h = ch;
+ 		c2 = c;
+ 	      }
+ 	      if (ch < c2h || (ch == c2h && c <= c2)) {
+ 		if (ch == 0) {
+ 		  ch = (unsigned char)c2;
+ 		  if (c2h > 0)
+ 		    ch = NOTCHAR - 1;
+ 		  for (; (unsigned char)c <= ch; c++) {
+ 		    setbit(c, ccl);
+ 		    if (case_fold) {
+ 		      if (ISUPPER(c))
+ 			setbit(tolower(c), ccl);
+ 		      else if (ISLOWER(c))
+ 			setbit(toupper(c), ccl);
+ 		    }
+ 		  }
+ 		  ch = 0x80;
+ 		  c = 0x00;
+ 		}
+ 		if (ch <= c2h) {
+ 		  if (mbexttok < 0) {
+ 		    mbexttok = MBEXTTOK_CLASS;
+ 		    zeroset(mbcset_set);
+ 		    zeroset(mbcset_all);
+ 		  }
+ 		  if (ch < c2h && c != 0x00) {	/* 最初の半端 */
+ 		    int t;
+ 
+ 		    if (ismbchar(ch)
+ 			&& ((t = tstbit(ch, mbcset_set))
+ 			    || !tstbit(ch, mbcset_all))) {
+ 		      if (!t) {
+ 			setbit(ch, mbcset_set);
+ 			zeroset(mbcset[ch & 0177]);
+ 		      }
+ 		      for (; c < NOTCHAR; c++)
+ 			setbit(c, mbcset[ch & 0177]);
+ 		    }
+ 		    ch++;
+ 		    c = 0x00;
+ 		  }
+ 		  if (ch < c2h || (ch == c2h && c == 0x00 && c2 == 0xff)) {
+ 		    if (c == 0x00 && c2 == 0xff)
+ 		      c2h++;
+ 		    for (; ch < c2h; ch++)
+ 		      if (ismbchar(ch)) {
+ 			clrbit(ch, mbcset_set);
+ 			setbit(ch, mbcset_all);
+ 		      }
+ 		    if (c == 0x00 && c2 == 0xff)
+ 		      c2h--;
+ 		    c = 0x00;
+ 		  }
+ 		  if (ch <= c2h) {
+ 		    int t;
+ 
+ 		    /* ここでは必ず c <= c2 となっている. */
+ 		    if (ismbchar(ch)
+ 			&& ((t = tstbit(ch, mbcset_set))
+ 			    || !tstbit(ch, mbcset_all))) {
+ 		      if (!t) {
+ 			setbit(ch, mbcset_set);
+ 			zeroset(mbcset[ch & 0177]);
+ 		      }
+ 		      for (; c <= c2; c++)
+ 			setbit(c, mbcset[ch & 0177]);
+ 		    }
+ 		  }
+ 		}
+ 	      }
+ 	    skip:
+ 	      ;
+ 	    }
+ 	  while ((c = c1) != ']');
+ 	  if (invert)
+ 	    {
+ 	      notset(ccl);
+ 	      if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
+ 		clrbit('\n', ccl);
+ 	      if (mbexttok == MBEXTTOK_CLASS) {
+ 		mbexttok = MBEXTTOK_INVCLASS;
+ 		if (!isemptyset(mbcset_set)) {
+ 		  for (c = 0x80; c <= 0xff; c++)
+ 		    if (tstbit(c, mbcset_set))
+ 		      notset(mbcset[c & 0177]);
+ 		}
+ 		notset(mbcset_all);
+ 	      }
+ 	      else
+ 		mbexttok = MBEXTTOK_ORMBC_NL;
+ 	    }
+ 	  if (current_mbctype != MBCTYPE_ASCII)
+ 	    for (c = 0x80; c <= 0xff; c++)
+ 	      if (ismbchar(c))
+ 		clrbit(c, ccl);
+ 	  laststart = 0;
+ 	  return lasttok = CSET + charclass_index(ccl);
+ 
+ 	default:
+ 	normal_char:
+ 	  laststart = 0;
+ 	  if (ismbchar(c)) {
+ 	    FETCH(mbexttok, "Multi-byte char incomplete");
+ 	    return c;
+ 	  }
+ 	  if (case_fold && ISALPHA(c))
+ 	    {
+ 	      zeroset(ccl);
+ 	      setbit(c, ccl);
+ 	      if (isupper(c))
+ 		setbit(tolower(c), ccl);
+ 	      else
+ 		setbit(toupper(c), ccl);
+ 	      return lasttok = CSET + charclass_index(ccl);
+ 	    }
+ 	  return c;
+ 	}
+     }
+ 
+   /* The above loop should consume at most a backslash
+      and some other character. */
+   abort();
+ }
+ 
+ /* Recursive descent parser for regular expressions. */
+ 
+ static token tok;		/* Lookahead token. */
+ static depth;			/* Current depth of a hypothetical stack
+ 				   holding deferred productions.  This is
+ 				   used to determine the depth that will be
+ 				   required of the real stack later on in
+ 				   dfaanalyze(). */
+ 
+ /* Add the given token to the parse tree, maintaining the depth count and
+    updating the maximum depth if necessary. */
+ static void
+ addtok(t)
+      token t;
+ {
+   REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex);
+   dfa->tokens[dfa->tindex++] = t;
+ 
+   switch (t)
+     {
+     case QMARK:
+     case STAR:
+     case PLUS:
+       break;
+ 
+     case CAT:
+     case OR:
+     case ORTOP:
+       --depth;
+       break;
+ 
+     default:
+       ++dfa->nleaves;
+     case EMPTY:
+       ++depth;
+       break;
+     }
+   if (depth > dfa->depth)
+     dfa->depth = depth;
+ }
+ 
+ /* The grammar understood by the parser is as follows.
+ 
+    regexp:
+      regexp OR branch
+      branch
+ 
+    branch:
+      branch closure
+      closure
+ 
+    closure:
+      closure QMARK
+      closure STAR
+      closure PLUS
+      atom
+ 
+    atom:
+      <normal character>
+      CSET
+      BACKREF
+      BEGLINE
+      ENDLINE
+      BEGWORD
+      ENDWORD
+      LIMWORD
+      NOTLIMWORD
+      <empty>
+ 
+    The parser builds a parse tree in postfix form in an array of tokens. */
+ 
+ static void
+ atom()
+ {
+   if (mbexttok >= 0) {
+     if (mbexttok < MBEXTTOK_NOTCHAR) {
+       addtok(tok);
+       addtok(mbexttok);
+       addtok(CAT);
+     }
+     else
+       switch (mbexttok) {
+       case MBEXTTOK_ORMBC:
+       case MBEXTTOK_ORMBC_NL:
+ 	addtok(tok);
+ 	if (mbexttok == MBEXTTOK_ORMBC) {
+ 	  addtok(setcodeset(1));
+ 	  addtok(setcodeset(2));
+ 	}
+ 	else {
+ 	  addtok(setcodeset(5));
+ 	  addtok(setcodeset(6));
+ 	}
+ 	addtok(CAT);
+ 	addtok(OR);
+ 	break;
+       case MBEXTTOK_CLASS:
+       case MBEXTTOK_INVCLASS:
+ 	{
+ 	  token c;
+ 
+ 	  addtok(tok);
+ 	  if (!isemptyset(mbcset_set))
+ 	    for (c = 0x80; c <= 0xff; c++)
+ 	      if (tstbit(c, mbcset_set)) {
+ 		/* Make sure all bits in mbcset_all valid. */
+ 		clrbit(c, mbcset_all);
+ 		addtok(c);
+ 		if (mbexttok == MBEXTTOK_CLASS) {
+ 		  clrbit('\n', mbcset[c & 0177]);
+ 		  clrbit('\0', mbcset[c & 0177]);
+ 		}
+ 		else {
+ 		  setbit('\n', mbcset[c & 0177]);
+ 		  setbit('\0', mbcset[c & 0177]);
+ 		}
+ 		addtok(CSET + charclass_index(mbcset[c & 0177]));
+ 		addtok(CAT);
+ 		addtok(OR);
+ 	      }
+ 	  if (!isemptyset(mbcset_all)) {
+ 	    addtok(CSET + charclass_index(mbcset_all));
+ 	    if (mbexttok == MBEXTTOK_CLASS)
+ 	      addtok(setcodeset(2));
+ 	    else
+ 	      addtok(setcodeset(6));
+ 	    addtok(CAT);
+ 	    addtok(OR);
+ 	  }
+ 	}
+ 	break;
+       default:
+ 	break;
+       }
+     tok = lex();
+   } else
+   if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
+       || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
+       || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
+     {
+       addtok(tok);
+       tok = lex();
+     }
+   else if (tok == LPAREN)
+     {
+       tok = lex();
+       regexp(0);
+       if (tok != RPAREN)
+ 	fprintf(stderr,"Unbalanced (");
+       tok = lex();
+     }
+   else
+     addtok(EMPTY);
+ }
+ 
+ /* Return the number of tokens in the given subexpression. */
+ static int
+ nsubtoks(tindex)
+ int tindex;
+ {
+   int ntoks1;
+ 
+   switch (dfa->tokens[tindex - 1])
+     {
+     default:
+       return 1;
+     case QMARK:
+     case STAR:
+     case PLUS:
+       return 1 + nsubtoks(tindex - 1);
+     case CAT:
+     case OR:
+     case ORTOP:
+       ntoks1 = nsubtoks(tindex - 1);
+       return 1 + ntoks1 + nsubtoks(tindex - 1 - ntoks1);
+     }
+ }
+ 
+ /* Copy the given subexpression to the top of the tree. */
+ static void
+ copytoks(tindex, ntokens)
+      int tindex, ntokens;
+ {
+   int i;
+ 
+   for (i = 0; i < ntokens; ++i)
+     addtok(dfa->tokens[tindex + i]);
+ }
+ 
+ static void
+ closure()
+ {
+   int tindex, ntokens, i;
+ 
+   atom();
+   while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN)
+     if (tok == REPMN)
+       {
+ 	ntokens = nsubtoks(dfa->tindex);
+ 	tindex = dfa->tindex - ntokens;
+ 	if (maxrep == 0)
+ 	  addtok(PLUS);
+ 	if (minrep == 0)
+ 	  addtok(QMARK);
+ 	for (i = 1; i < minrep; ++i)
+ 	  {
+ 	    copytoks(tindex, ntokens);
+ 	    addtok(CAT);
+ 	  }
+ 	for (; i < maxrep; ++i)
+ 	  {
+ 	    copytoks(tindex, ntokens);
+ 	    addtok(QMARK);
+ 	    addtok(CAT);
+ 	  }
+ 	tok = lex();
+       }
+     else
+       {
+ 	addtok(tok);
+ 	tok = lex();
+       }
+ }
+ 
+ static void
+ branch()
+ {
+   closure();
+   while (tok != RPAREN && tok != OR && tok >= 0)
+     {
+       closure();
+       addtok(CAT);
+     }
+ }
+ 
+ static void
+ regexp(toplevel)
+      int toplevel;
+ {
+   branch();
+   while (tok == OR)
+     {
+       tok = lex();
+       branch();
+       if (toplevel)
+ 	addtok(ORTOP);
+       else
+ 	addtok(OR);
+     }
+ }
+ 
+ /* Main entry point for the parser.  S is a string to be parsed, len is the
+    length of the string, so s can include NUL characters.  D is a pointer to
+    the struct dfa to parse into. */
+ void
+ dfaparse(s, len, d)
+      char *s;
+      size_t len;
+      struct dfa *d;
+ 
+ {
+   dfa = d;
+   lexstart = lexptr = s;
+   lexleft = len;
+   lasttok = END;
+   laststart = 1;
+   parens = 0;
+ 
+   if (! syntax_bits_set)
+     fprintf(stderr,"No syntax specified");
+ 
+   tok = lex();
+   depth = d->depth;
+ 
+   regexp(1);
+ 
+   if (tok != END)
+     fprintf(stderr,"Unbalanced )");
+ 
+   addtok(END - d->nregexps);
+   addtok(CAT);
+ 
+   if (d->nregexps)
+     addtok(ORTOP);
+ 
+   ++d->nregexps;
+ }
+ 
+ /* Some primitives for operating on sets of positions. */
+ 
+ /* Copy one set to another; the destination must be large enough. */
+ static void
+ copy(src, dst)
+      position_set *src;
+      position_set *dst;
+ {
+   int i;
+ 
+   for (i = 0; i < src->nelem; ++i)
+     dst->elems[i] = src->elems[i];
+   dst->nelem = src->nelem;
+ }
+ 
+ /* Insert a position in a set.  Position sets are maintained in sorted
+    order according to index.  If position already exists in the set with
+    the same index then their constraints are logically or'd together.
+    S->elems must point to an array large enough to hold the resulting set. */
+ static void
+ insert(p, s)
+      position p;
+      position_set *s;
+ {
+   int i;
+   position t1, t2;
+ 
+   for (i = 0; i < s->nelem && p.index < s->elems[i].index; ++i)
+     ;
+   if (i < s->nelem && p.index == s->elems[i].index)
+     s->elems[i].constraint |= p.constraint;
+   else
+     {
+       t1 = p;
+       ++s->nelem;
+       while (i < s->nelem)
+ 	{
+ 	  t2 = s->elems[i];
+ 	  s->elems[i++] = t1;
+ 	  t1 = t2;
+ 	}
+     }
+ }
+ 
+ /* Merge two sets of positions into a third.  The result is exactly as if
+    the positions of both sets were inserted into an initially empty set. */
+ static void
+ merge(s1, s2, m)
+      position_set *s1;
+      position_set *s2;
+      position_set *m;
+ {
+   int i = 0, j = 0;
+ 
+   m->nelem = 0;
+   while (i < s1->nelem && j < s2->nelem)
+     if (s1->elems[i].index > s2->elems[j].index)
+       m->elems[m->nelem++] = s1->elems[i++];
+     else if (s1->elems[i].index < s2->elems[j].index)
+       m->elems[m->nelem++] = s2->elems[j++];
+     else
+       {
+ 	m->elems[m->nelem] = s1->elems[i++];
+ 	m->elems[m->nelem++].constraint |= s2->elems[j++].constraint;
+       }
+   while (i < s1->nelem)
+     m->elems[m->nelem++] = s1->elems[i++];
+   while (j < s2->nelem)
+     m->elems[m->nelem++] = s2->elems[j++];
+ }
+ 
+ /* Delete a position from a set. */
+ static void
+ delete(p, s)
+      position p;
+      position_set *s;
+ {
+   int i;
+ 
+   for (i = 0; i < s->nelem; ++i)
+     if (p.index == s->elems[i].index)
+       break;
+   if (i < s->nelem)
+     for (--s->nelem; i < s->nelem; ++i)
+       s->elems[i] = s->elems[i + 1];
+ }
+ 
+ /* Find the index of the state corresponding to the given position set with
+    the given preceding context, or create a new state if there is no such
+    state.  Newline and letter tell whether we got here on a newline or
+    letter, respectively. */
+ static int
+ state_index(d, s, newline, letter)
+      struct dfa *d;
+      position_set *s;
+      int newline;
+      int letter;
+ {
+   int hash = 0;
+   int constraint;
+   int i, j;
+ 
+   newline = newline ? 1 : 0;
+   letter = letter ? 1 : 0;
+ 
+   for (i = 0; i < s->nelem; ++i)
+     hash ^= s->elems[i].index + s->elems[i].constraint;
+ 
+   /* Try to find a state that exactly matches the proposed one. */
+   for (i = 0; i < d->sindex; ++i)
+     {
+       if (hash != d->states[i].hash || s->nelem != d->states[i].elems.nelem
+ 	  || newline != d->states[i].newline || letter != d->states[i].letter)
+ 	continue;
+       for (j = 0; j < s->nelem; ++j)
+ 	if (s->elems[j].constraint
+ 	    != d->states[i].elems.elems[j].constraint
+ 	    || s->elems[j].index != d->states[i].elems.elems[j].index)
+ 	  break;
+       if (j == s->nelem)
+ 	return i;
+     }
+ 
+   /* We'll have to create a new state. */
+   REALLOC_IF_NECESSARY(d->states, dfa_state, d->salloc, d->sindex);
+   d->states[i].hash = hash;
+   MALLOC(d->states[i].elems.elems, position, s->nelem);
+   copy(s, &d->states[i].elems);
+   d->states[i].newline = newline;
+   d->states[i].letter = letter;
+   d->states[i].backref = 0;
+   d->states[i].constraint = 0;
+   d->states[i].first_end = 0;
+   for (j = 0; j < s->nelem; ++j)
+     if (d->tokens[s->elems[j].index] < 0)
+       {
+ 	constraint = s->elems[j].constraint;
+ 	if (SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 0)
+ 	    || SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 1)
+ 	    || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 0)
+ 	    || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 1))
+ 	  d->states[i].constraint |= constraint;
+ 	if (! d->states[i].first_end)
+ 	  d->states[i].first_end = d->tokens[s->elems[j].index];
+       }
+     else if (d->tokens[s->elems[j].index] == BACKREF)
+       {
+ 	d->states[i].constraint = NO_CONSTRAINT;
+ 	d->states[i].backref = 1;
+       }
+ 
+   ++d->sindex;
+ 
+   return i;
+ }
+ 
+ /* Find the epsilon closure of a set of positions.  If any position of the set
+    contains a symbol that matches the empty string in some context, replace
+    that position with the elements of its follow labeled with an appropriate
+    constraint.  Repeat exhaustively until no funny positions are left.
+    S->elems must be large enough to hold the result. */
+ static void epsclosure _RE_ARGS((position_set *s, struct dfa *d));
+ 
+ static void
+ epsclosure(s, d)
+      position_set *s;
+      struct dfa *d;
+ {
+   int i, j;
+   int *visited;
+   position p, old;
+ 
+   MALLOC(visited, int, d->tindex);
+   for (i = 0; i < d->tindex; ++i)
+     visited[i] = 0;
+ 
+   for (i = 0; i < s->nelem; ++i)
+     if (d->tokens[s->elems[i].index] >= NOTCHAR
+ 	&& d->tokens[s->elems[i].index] != BACKREF
+ 	&& d->tokens[s->elems[i].index] < CSET)
+       {
+ 	old = s->elems[i];
+ 	p.constraint = old.constraint;
+ 	delete(s->elems[i], s);
+ 	if (visited[old.index])
+ 	  {
+ 	    --i;
+ 	    continue;
+ 	  }
+ 	visited[old.index] = 1;
+ 	switch (d->tokens[old.index])
+ 	  {
+ 	  case BEGLINE:
+ 	    p.constraint &= BEGLINE_CONSTRAINT;
+ 	    break;
+ 	  case ENDLINE:
+ 	    p.constraint &= ENDLINE_CONSTRAINT;
+ 	    break;
+ 	  case BEGWORD:
+ 	    p.constraint &= BEGWORD_CONSTRAINT;
+ 	    break;
+ 	  case ENDWORD:
+ 	    p.constraint &= ENDWORD_CONSTRAINT;
+ 	    break;
+ 	  case LIMWORD:
+ 	    p.constraint &= LIMWORD_CONSTRAINT;
+ 	    break;
+ 	  case NOTLIMWORD:
+ 	    p.constraint &= NOTLIMWORD_CONSTRAINT;
+ 	    break;
+ 	  default:
+ 	    break;
+ 	  }
+ 	for (j = 0; j < d->follows[old.index].nelem; ++j)
+ 	  {
+ 	    p.index = d->follows[old.index].elems[j].index;
+ 	    insert(p, s);
+ 	  }
+ 	/* Force rescan to start at the beginning. */
+ 	i = -1;
+       }
+ 
+   free(visited);
+ }
+ 
+ /* Perform bottom-up analysis on the parse tree, computing various functions.
+    Note that at this point, we're pretending constructs like \< are real
+    characters rather than constraints on what can follow them.
+ 
+    Nullable:  A node is nullable if it is at the root of a regexp that can
+    match the empty string.
+    *  EMPTY leaves are nullable.
+    * No other leaf is nullable.
+    * A QMARK or STAR node is nullable.
+    * A PLUS node is nullable if its argument is nullable.
+    * A CAT node is nullable if both its arguments are nullable.
+    * An OR node is nullable if either argument is nullable.
+ 
+    Firstpos:  The firstpos of a node is the set of positions (nonempty leaves)
+    that could correspond to the first character of a string matching the
+    regexp rooted at the given node.
+    * EMPTY leaves have empty firstpos.
+    * The firstpos of a nonempty leaf is that leaf itself.
+    * The firstpos of a QMARK, STAR, or PLUS node is the firstpos of its
+      argument.
+    * The firstpos of a CAT node is the firstpos of the left argument, union
+      the firstpos of the right if the left argument is nullable.
+    * The firstpos of an OR node is the union of firstpos of each argument.
+ 
+    Lastpos:  The lastpos of a node is the set of positions that could
+    correspond to the last character of a string matching the regexp at
+    the given node.
+    * EMPTY leaves have empty lastpos.
+    * The lastpos of a nonempty leaf is that leaf itself.
+    * The lastpos of a QMARK, STAR, or PLUS node is the lastpos of its
+      argument.
+    * The lastpos of a CAT node is the lastpos of its right argument, union
+      the lastpos of the left if the right argument is nullable.
+    * The lastpos of an OR node is the union of the lastpos of each argument.
+ 
+    Follow:  The follow of a position is the set of positions that could
+    correspond to the character following a character matching the node in
+    a string matching the regexp.  At this point we consider special symbols
+    that match the empty string in some context to be just normal characters.
+    Later, if we find that a special symbol is in a follow set, we will
+    replace it with the elements of its follow, labeled with an appropriate
+    constraint.
+    * Every node in the firstpos of the argument of a STAR or PLUS node is in
+      the follow of every node in the lastpos.
+    * Every node in the firstpos of the second argument of a CAT node is in
+      the follow of every node in the lastpos of the first argument.
+ 
+    Because of the postfix representation of the parse tree, the depth-first
+    analysis is conveniently done by a linear scan with the aid of a stack.
+    Sets are stored as arrays of the elements, obeying a stack-like allocation
+    scheme; the number of elements in each set deeper in the stack can be
+    used to determine the address of a particular set's array. */
+ void
+ dfaanalyze(d, searchflag)
+      struct dfa *d;
+      int searchflag;
+ {
+   int *nullable;		/* Nullable stack. */
+   int *nfirstpos;		/* Element count stack for firstpos sets. */
+   position *firstpos;		/* Array where firstpos elements are stored. */
+   int *nlastpos;		/* Element count stack for lastpos sets. */
+   position *lastpos;		/* Array where lastpos elements are stored. */
+   int *nalloc;			/* Sizes of arrays allocated to follow sets. */
+   position_set tmp;		/* Temporary set for merging sets. */
+   position_set merged;		/* Result of merging sets. */
+   int wants_newline;		/* True if some position wants newline info. */
+   int *o_nullable;
+   int *o_nfirst, *o_nlast;
+   position *o_firstpos, *o_lastpos;
+   int i, j;
+   position *pos;
+ 
+ #ifdef DEBUG
+   fprintf(stderr, "dfaanalyze:\n");
+   for (i = 0; i < d->tindex; ++i)
+     {
+       fprintf(stderr, " %d:", i);
+       prtok(d->tokens[i]);
+     }
+   putc('\n', stderr);
+ #endif
+ 
+   d->searchflag = searchflag;
+ 
+   MALLOC(nullable, int, d->depth);
+   o_nullable = nullable;
+   MALLOC(nfirstpos, int, d->depth);
+   o_nfirst = nfirstpos;
+   MALLOC(firstpos, position, d->nleaves);
+   o_firstpos = firstpos, firstpos += d->nleaves;
+   MALLOC(nlastpos, int, d->depth);
+   o_nlast = nlastpos;
+   MALLOC(lastpos, position, d->nleaves);
+   o_lastpos = lastpos, lastpos += d->nleaves;
+   MALLOC(nalloc, int, d->tindex);
+   for (i = 0; i < d->tindex; ++i)
+     nalloc[i] = 0;
+   MALLOC(merged.elems, position, d->nleaves);
+ 
+   CALLOC(d->follows, position_set, d->tindex);
+ 
+   for (i = 0; i < d->tindex; ++i)
+ #ifdef DEBUG
+     {				/* Nonsyntactic #ifdef goo... */
+ #endif
+     switch (d->tokens[i])
+       {
+       case EMPTY:
+ 	/* The empty set is nullable. */
+ 	*nullable++ = 1;
+ 
+ 	/* The firstpos and lastpos of the empty leaf are both empty. */
+ 	*nfirstpos++ = *nlastpos++ = 0;
+ 	break;
+ 
+       case STAR:
+       case PLUS:
+ 	/* Every element in the firstpos of the argument is in the follow
+ 	   of every element in the lastpos. */
+ 	tmp.nelem = nfirstpos[-1];
+ 	tmp.elems = firstpos;
+ 	pos = lastpos;
+ 	for (j = 0; j < nlastpos[-1]; ++j)
+ 	  {
+ 	    merge(&tmp, &d->follows[pos[j].index], &merged);
+ 	    REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
+ 				 nalloc[pos[j].index], merged.nelem - 1);
+ 	    copy(&merged, &d->follows[pos[j].index]);
+ 	  }
+ 
+       case QMARK:
+ 	/* A QMARK or STAR node is automatically nullable. */
+ 	if (d->tokens[i] != PLUS)
+ 	  nullable[-1] = 1;
+ 	break;
+ 
+       case CAT:
+ 	/* Every element in the firstpos of the second argument is in the
+ 	   follow of every element in the lastpos of the first argument. */
+ 	tmp.nelem = nfirstpos[-1];
+ 	tmp.elems = firstpos;
+ 	pos = lastpos + nlastpos[-1];
+ 	for (j = 0; j < nlastpos[-2]; ++j)
+ 	  {
+ 	    merge(&tmp, &d->follows[pos[j].index], &merged);
+ 	    REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
+ 				 nalloc[pos[j].index], merged.nelem - 1);
+ 	    copy(&merged, &d->follows[pos[j].index]);
+ 	  }
+ 
+ 	/* The firstpos of a CAT node is the firstpos of the first argument,
+ 	   union that of the second argument if the first is nullable. */
+ 	if (nullable[-2])
+ 	  nfirstpos[-2] += nfirstpos[-1];
+ 	else
+ 	  firstpos += nfirstpos[-1];
+ 	--nfirstpos;
+ 
+ 	/* The lastpos of a CAT node is the lastpos of the second argument,
+ 	   union that of the first argument if the second is nullable. */
+ 	if (nullable[-1])
+ 	  nlastpos[-2] += nlastpos[-1];
+ 	else
+ 	  {
+ 	    pos = lastpos + nlastpos[-2];
+ 	    for (j = nlastpos[-1] - 1; j >= 0; --j)
+ 	      pos[j] = lastpos[j];
+ 	    lastpos += nlastpos[-2];
+ 	    nlastpos[-2] = nlastpos[-1];
+ 	  }
+ 	--nlastpos;
+ 
+ 	/* A CAT node is nullable if both arguments are nullable. */
+ 	nullable[-2] = nullable[-1] && nullable[-2];
+ 	--nullable;
+ 	break;
+ 
+       case OR:
+       case ORTOP:
+ 	/* The firstpos is the union of the firstpos of each argument. */
+ 	nfirstpos[-2] += nfirstpos[-1];
+ 	--nfirstpos;
+ 
+ 	/* The lastpos is the union of the lastpos of each argument. */
+ 	nlastpos[-2] += nlastpos[-1];
+ 	--nlastpos;
+ 
+ 	/* An OR node is nullable if either argument is nullable. */
+ 	nullable[-2] = nullable[-1] || nullable[-2];
+ 	--nullable;
+ 	break;
+ 
+       default:
+ 	/* Anything else is a nonempty position.  (Note that special
+ 	   constructs like \< are treated as nonempty strings here;
+ 	   an "epsilon closure" effectively makes them nullable later.
+ 	   Backreferences have to get a real position so we can detect
+ 	   transitions on them later.  But they are nullable. */
+ 	*nullable++ = d->tokens[i] == BACKREF;
+ 
+ 	/* This position is in its own firstpos and lastpos. */
+ 	*nfirstpos++ = *nlastpos++ = 1;
+ 	--firstpos, --lastpos;
+ 	firstpos->index = lastpos->index = i;
+ 	firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
+ 
+ 	/* Allocate the follow set for this position. */
+ 	nalloc[i] = 1;
+ 	MALLOC(d->follows[i].elems, position, nalloc[i]);
+ 	break;
+       }
+ #ifdef DEBUG
+     /* ... balance the above nonsyntactic #ifdef goo... */
+       fprintf(stderr, "node %d:", i);
+       prtok(d->tokens[i]);
+       putc('\n', stderr);
+       fprintf(stderr, nullable[-1] ? " nullable: yes\n" : " nullable: no\n");
+       fprintf(stderr, " firstpos:");
+       for (j = nfirstpos[-1] - 1; j >= 0; --j)
+ 	{
+ 	  fprintf(stderr, " %d:", firstpos[j].index);
+ 	  prtok(d->tokens[firstpos[j].index]);
+ 	}
+       fprintf(stderr, "\n lastpos:");
+       for (j = nlastpos[-1] - 1; j >= 0; --j)
+ 	{
+ 	  fprintf(stderr, " %d:", lastpos[j].index);
+ 	  prtok(d->tokens[lastpos[j].index]);
+ 	}
+       putc('\n', stderr);
+     }
+ #endif
+ 
+   /* For each follow set that is the follow set of a real position, replace
+      it with its epsilon closure. */
+   for (i = 0; i < d->tindex; ++i)
+     if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF
+ 	|| d->tokens[i] >= CSET)
+       {
+ #ifdef DEBUG
+ 	fprintf(stderr, "follows(%d:", i);
+ 	prtok(d->tokens[i]);
+ 	fprintf(stderr, "):");
+ 	for (j = d->follows[i].nelem - 1; j >= 0; --j)
+ 	  {
+ 	    fprintf(stderr, " %d:", d->follows[i].elems[j].index);
+ 	    prtok(d->tokens[d->follows[i].elems[j].index]);
+ 	  }
+ 	putc('\n', stderr);
+ #endif
+ 	copy(&d->follows[i], &merged);
+ 	epsclosure(&merged, d);
+ 	if (d->follows[i].nelem < merged.nelem)
+ 	  REALLOC(d->follows[i].elems, position, merged.nelem);
+ 	copy(&merged, &d->follows[i]);
+       }
+ 
+   /* Get the epsilon closure of the firstpos of the regexp.  The result will
+      be the set of positions of state 0. */
+   merged.nelem = 0;
+   for (i = 0; i < nfirstpos[-1]; ++i)
+     insert(firstpos[i], &merged);
+   epsclosure(&merged, d);
+ 
+   /* Check if any of the positions of state 0 will want newline context. */
+   wants_newline = 0;
+   for (i = 0; i < merged.nelem; ++i)
+     if (PREV_NEWLINE_DEPENDENT(merged.elems[i].constraint))
+       wants_newline = 1;
+ 
+   /* Build the initial state. */
+   d->salloc = 1;
+   d->sindex = 0;
+   MALLOC(d->states, dfa_state, d->salloc);
+   state_index(d, &merged, wants_newline, 0);
+ 
+   free(o_nullable);
+   free(o_nfirst);
+   free(o_firstpos);
+   free(o_nlast);
+   free(o_lastpos);
+   free(nalloc);
+   free(merged.elems);
+ }
+ 
+ /* Find, for each character, the transition out of state s of d, and store
+    it in the appropriate slot of trans.
+ 
+    We divide the positions of s into groups (positions can appear in more
+    than one group).  Each group is labeled with a set of characters that
+    every position in the group matches (taking into account, if necessary,
+    preceding context information of s).  For each group, find the union
+    of the its elements' follows.  This set is the set of positions of the
+    new state.  For each character in the group's label, set the transition
+    on this character to be to a state corresponding to the set's positions,
+    and its associated backward context information, if necessary.
+ 
+    If we are building a searching matcher, we include the positions of state
+    0 in every state.
+ 
+    The collection of groups is constructed by building an equivalence-class
+    partition of the positions of s.
+ 
+    For each position, find the set of characters C that it matches.  Eliminate
+    any characters from C that fail on grounds of backward context.
+ 
+    Search through the groups, looking for a group whose label L has nonempty
+    intersection with C.  If L - C is nonempty, create a new group labeled
+    L - C and having the same positions as the current group, and set L to
+    the intersection of L and C.  Insert the position in this group, set
+    C = C - L, and resume scanning.
+ 
+    If after comparing with every group there are characters remaining in C,
+    create a new group labeled with the characters of C and insert this
+    position in that group. */
+ void
+ dfastate(s, d, trans)
+      int s;
+      struct dfa *d;
+      int trans[];
+ {
+   position_set grps[NOTCHAR];	/* As many as will ever be needed. */
+   charclass labels[NOTCHAR];	/* Labels corresponding to the groups. */
+   int ngrps = 0;		/* Number of groups actually used. */
+   position pos;			/* Current position being considered. */
+   charclass matches;		/* Set of matching characters. */
+   int matchesf;			/* True if matches is nonempty. */
+   charclass intersect;		/* Intersection with some label set. */
+   int intersectf;		/* True if intersect is nonempty. */
+   charclass leftovers;		/* Stuff in the label that didn't match. */
+   int leftoversf;		/* True if leftovers is nonempty. */
+   static charclass letters;	/* Set of characters considered letters. */
+   static charclass newline;	/* Set of characters that aren't newline. */
+   position_set follows;		/* Union of the follows of some group. */
+   position_set tmp;		/* Temporary space for merging sets. */
+   int state;			/* New state. */
+   int wants_newline;		/* New state wants to know newline context. */
+   int state_newline;		/* New state on a newline transition. */
+   int wants_letter;		/* New state wants to know letter context. */
+   int state_letter;		/* New state on a letter transition. */
+   static initialized;		/* Flag for static initialization. */
+   int i, j, k;
+ 
+   /* Initialize the set of letters, if necessary. */
+   if (! initialized)
+     {
+       initialized = 1;
+       for (i = 0; i < NOTCHAR; ++i)
+ 	if (ISALNUM(i))
+ 	  setbit(i, letters);
+       setbit('\n', newline);
+     }
+ 
+   zeroset(matches);
+ 
+   for (i = 0; i < d->states[s].elems.nelem; ++i)
+     {
+       pos = d->states[s].elems.elems[i];
+       if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR)
+ 	setbit(d->tokens[pos.index], matches);
+       else if (d->tokens[pos.index] >= CSET)
+ 	copyset(d->charclasses[d->tokens[pos.index] - CSET], matches);
+       else
+ 	continue;
+ 
+       /* Some characters may need to be eliminated from matches because
+ 	 they fail in the current context. */
+       if (pos.constraint != 0xFF)
+ 	{
+ 	  if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
+ 					 d->states[s].newline, 1))
+ 	    clrbit('\n', matches);
+ 	  if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
+ 					 d->states[s].newline, 0))
+ 	    for (j = 0; j < CHARCLASS_INTS; ++j)
+ 	      matches[j] &= newline[j];
+ 	  if (! MATCHES_LETTER_CONTEXT(pos.constraint,
+ 					d->states[s].letter, 1))
+ 	    for (j = 0; j < CHARCLASS_INTS; ++j)
+ 	      matches[j] &= ~letters[j];
+ 	  if (! MATCHES_LETTER_CONTEXT(pos.constraint,
+ 					d->states[s].letter, 0))
+ 	    for (j = 0; j < CHARCLASS_INTS; ++j)
+ 	      matches[j] &= letters[j];
+ 
+ 	  /* If there are no characters left, there's no point in going on. */
+ 	  for (j = 0; j < CHARCLASS_INTS && !matches[j]; ++j)
+ 	    ;
+ 	  if (j == CHARCLASS_INTS)
+ 	    continue;
+ 	}
+ 
+       for (j = 0; j < ngrps; ++j)
+ 	{
+ 	  /* If matches contains a single character only, and the current
+ 	     group's label doesn't contain that character, go on to the
+ 	     next group. */
+ 	  if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR
+ 	      && !tstbit(d->tokens[pos.index], labels[j]))
+ 	    continue;
+ 
+ 	  /* Check if this group's label has a nonempty intersection with
+ 	     matches. */
+ 	  intersectf = 0;
+ 	  for (k = 0; k < CHARCLASS_INTS; ++k)
+ 	    (intersect[k] = matches[k] & labels[j][k]) ? intersectf = 1 : 0;
+ 	  if (! intersectf)
+ 	    continue;
+ 
+ 	  /* It does; now find the set differences both ways. */
+ 	  leftoversf = matchesf = 0;
+ 	  for (k = 0; k < CHARCLASS_INTS; ++k)
+ 	    {
+ 	      /* Even an optimizing compiler can't know this for sure. */
+ 	      int match = matches[k], label = labels[j][k];
+ 
+ 	      (leftovers[k] = ~match & label) ? leftoversf = 1 : 0;
+ 	      (matches[k] = match & ~label) ? matchesf = 1 : 0;
+ 	    }
+ 
+ 	  /* If there were leftovers, create a new group labeled with them. */
+ 	  if (leftoversf)
+ 	    {
+ 	      copyset(leftovers, labels[ngrps]);
+ 	      copyset(intersect, labels[j]);
+ 	      MALLOC(grps[ngrps].elems, position, d->nleaves);
+ 	      copy(&grps[j], &grps[ngrps]);
+ 	      ++ngrps;
+ 	    }
+ 
+ 	  /* Put the position in the current group.  Note that there is no
+ 	     reason to call insert() here. */
+ 	  grps[j].elems[grps[j].nelem++] = pos;
+ 
+ 	  /* If every character matching the current position has been
+ 	     accounted for, we're done. */
+ 	  if (! matchesf)
+ 	    break;
+ 	}
+ 
+       /* If we've passed the last group, and there are still characters
+ 	 unaccounted for, then we'll have to create a new group. */
+       if (j == ngrps)
+ 	{
+ 	  copyset(matches, labels[ngrps]);
+ 	  zeroset(matches);
+ 	  MALLOC(grps[ngrps].elems, position, d->nleaves);
+ 	  grps[ngrps].nelem = 1;
+ 	  grps[ngrps].elems[0] = pos;
+ 	  ++ngrps;
+ 	}
+     }
+ 
+   MALLOC(follows.elems, position, d->nleaves);
+   MALLOC(tmp.elems, position, d->nleaves);
+ 
+   /* If we are a searching matcher, the default transition is to a state
+      containing the positions of state 0, otherwise the default transition
+      is to fail miserably. */
+   if (d->searchflag)
+     {
+       wants_newline = 0;
+       wants_letter = 0;
+       for (i = 0; i < d->states[0].elems.nelem; ++i)
+ 	{
+ 	  if (PREV_NEWLINE_DEPENDENT(d->states[0].elems.elems[i].constraint))
+ 	    wants_newline = 1;
+ 	  if (PREV_LETTER_DEPENDENT(d->states[0].elems.elems[i].constraint))
+ 	    wants_letter = 1;
+ 	}
+       copy(&d->states[0].elems, &follows);
+       state = state_index(d, &follows, 0, 0);
+       if (wants_newline)
+ 	state_newline = state_index(d, &follows, 1, 0);
+       else
+ 	state_newline = state;
+       if (wants_letter)
+ 	state_letter = state_index(d, &follows, 0, 1);
+       else
+ 	state_letter = state;
+       for (i = 0; i < NOTCHAR; ++i)
+ 	if (i == '\n')
+ 	  trans[i] = state_newline;
+ 	else if (ISALNUM(i))
+ 	  trans[i] = state_letter;
+ 	else
+ 	  trans[i] = state;
+     }
+   else
+     for (i = 0; i < NOTCHAR; ++i)
+       trans[i] = -1;
+ 
+   for (i = 0; i < ngrps; ++i)
+     {
+       follows.nelem = 0;
+ 
+       /* Find the union of the follows of the positions of the group.
+ 	 This is a hideously inefficient loop.  Fix it someday. */
+       for (j = 0; j < grps[i].nelem; ++j)
+ 	for (k = 0; k < d->follows[grps[i].elems[j].index].nelem; ++k)
+ 	  insert(d->follows[grps[i].elems[j].index].elems[k], &follows);
+ 
+       /* If we are building a searching matcher, throw in the positions
+ 	 of state 0 as well. */
+       if (d->searchflag)
+ 	for (j = 0; j < d->states[0].elems.nelem; ++j)
+ 	  insert(d->states[0].elems.elems[j], &follows);
+ 
+       /* Find out if the new state will want any context information. */
+       wants_newline = 0;
+       if (tstbit('\n', labels[i]))
+ 	for (j = 0; j < follows.nelem; ++j)
+ 	  if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
+ 	    wants_newline = 1;
+ 
+       wants_letter = 0;
+       for (j = 0; j < CHARCLASS_INTS; ++j)
+ 	if (labels[i][j] & letters[j])
+ 	  break;
+       if (j < CHARCLASS_INTS)
+ 	for (j = 0; j < follows.nelem; ++j)
+ 	  if (PREV_LETTER_DEPENDENT(follows.elems[j].constraint))
+ 	    wants_letter = 1;
+ 
+       /* Find the state(s) corresponding to the union of the follows. */
+       state = state_index(d, &follows, 0, 0);
+       if (wants_newline)
+ 	state_newline = state_index(d, &follows, 1, 0);
+       else
+ 	state_newline = state;
+       if (wants_letter)
+ 	state_letter = state_index(d, &follows, 0, 1);
+       else
+ 	state_letter = state;
+ 
+       /* Set the transitions for each character in the current label. */
+       for (j = 0; j < CHARCLASS_INTS; ++j)
+ 	for (k = 0; k < INTBITS; ++k)
+ 	  if (labels[i][j] & 1 << k)
+ 	    {
+ 	      int c = j * INTBITS + k;
+ 
+ 	      if (c == '\n')
+ 		trans[c] = state_newline;
+ 	      else if (ISALNUM(c))
+ 		trans[c] = state_letter;
+ 	      else if (c < NOTCHAR)
+ 		trans[c] = state;
+ 	    }
+     }
+ 
+   for (i = 0; i < ngrps; ++i)
+     free(grps[i].elems);
+   free(follows.elems);
+   free(tmp.elems);
+ }
+ 
+ /* Some routines for manipulating a compiled dfa's transition tables.
+    Each state may or may not have a transition table; if it does, and it
+    is a non-accepting state, then d->trans[state] points to its table.
+    If it is an accepting state then d->fails[state] points to its table.
+    If it has no table at all, then d->trans[state] is NULL.
+    TODO: Improve this comment, get rid of the unnecessary redundancy. */
+ 
+ static void
+ build_state(s, d)
+      int s;
+      struct dfa *d;
+ {
+   int *trans;			/* The new transition table. */
+   int i;
+ 
+   /* Set an upper limit on the number of transition tables that will ever
+      exist at once.  1024 is arbitrary.  The idea is that the frequently
+      used transition tables will be quickly rebuilt, whereas the ones that
+      were only needed once or twice will be cleared away. */
+   if (d->trcount >= 1024)
+     {
+       for (i = 0; i < d->tralloc; ++i)
+ 	if (d->trans[i])
+ 	  {
+ 	    free((ptr_t) d->trans[i]);
+ 	    d->trans[i] = NULL;
+ 	  }
+ 	else if (d->fails[i])
+ 	  {
+ 	    free((ptr_t) d->fails[i]);
+ 	    d->fails[i] = NULL;
+ 	  }
+       d->trcount = 0;
+     }
+ 
+   ++d->trcount;
+ 
+   /* Set up the success bits for this state. */
+   d->success[s] = 0;
+   if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 1, d->states[s].letter, 0,
+       s, *d))
+     d->success[s] |= 4;
+   if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 1,
+       s, *d))
+     d->success[s] |= 2;
+   if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 0,
+       s, *d))
+     d->success[s] |= 1;
+ 
+   MALLOC(trans, int, NOTCHAR);
+   dfastate(s, d, trans);
+ 
+   /* Now go through the new transition table, and make sure that the trans
+      and fail arrays are allocated large enough to hold a pointer for the
+      largest state mentioned in the table. */
+   for (i = 0; i < NOTCHAR; ++i)
+     if (trans[i] >= d->tralloc)
+       {
+ 	int oldalloc = d->tralloc;
+ 
+ 	while (trans[i] >= d->tralloc)
+ 	  d->tralloc *= 2;
+ 	REALLOC(d->realtrans, int *, d->tralloc + 1);
+ 	d->trans = d->realtrans + 1;
+ 	REALLOC(d->fails, int *, d->tralloc);
+ 	REALLOC(d->success, int, d->tralloc);
+ 	REALLOC(d->newlines, int, d->tralloc);
+ 	while (oldalloc < d->tralloc)
+ 	  {
+ 	    d->trans[oldalloc] = NULL;
+ 	    d->fails[oldalloc++] = NULL;
+ 	  }
+       }
+ 
+   /* Keep the newline transition in a special place so we can use it as
+      a sentinel. */
+   d->newlines[s] = trans['\n'];
+   trans['\n'] = -1;
+ 
+   if (ACCEPTING(s, *d))
+     d->fails[s] = trans;
+   else
+     d->trans[s] = trans;
+ }
+ 
+ static void
+ build_state_zero(d)
+      struct dfa *d;
+ {
+   d->tralloc = 1;
+   d->trcount = 0;
+   CALLOC(d->realtrans, int *, d->tralloc + 1);
+   d->trans = d->realtrans + 1;
+   CALLOC(d->fails, int *, d->tralloc);
+   MALLOC(d->success, int, d->tralloc);
+   MALLOC(d->newlines, int, d->tralloc);
+   build_state(0, d);
+ }
+ 
+ /* Search through a buffer looking for a match to the given struct dfa.
+    Find the first occurrence of a string matching the regexp in the buffer,
+    and the shortest possible version thereof.  Return a pointer to the first
+    character after the match, or NULL if none is found.  Begin points to
+    the beginning of the buffer, and end points to the first character after
+    its end.  We store a newline in *end to act as a sentinel, so end had
+    better point somewhere valid.  Newline is a flag indicating whether to
+    allow newlines to be in the matching string.  If count is non-
+    NULL it points to a place we're supposed to increment every time we
+    see a newline.  Finally, if backref is non-NULL it points to a place
+    where we're supposed to store a 1 if backreferencing happened and the
+    match needs to be verified by a backtracking matcher.  Otherwise
+    we store a 0 in *backref. */
+ char *
+ dfaexec(d, begin, end, newline, count, backref)
+      struct dfa *d;
+      char *begin;
+      char *end;
+      int newline;
+      int *count;
+      int *backref;
+ {
+   register s, s1, tmp;		/* Current state. */
+   register unsigned char *p;	/* Current input character. */
+   register **trans, *t;		/* Copy of d->trans so it can be optimized
+ 				   into a register. */
+   static sbit[NOTCHAR];	/* Table for anding with d->success. */
+   static sbit_init;
+ 
+   if (! sbit_init)
+     {
+       int i;
+ 
+       sbit_init = 1;
+       for (i = 0; i < NOTCHAR; ++i)
+ 	if (i == '\n')
+ 	  sbit[i] = 4;
+ 	else if (ISALNUM(i))
+ 	  sbit[i] = 2;
+ 	else
+ 	  sbit[i] = 1;
+     }
+ 
+   if (! d->tralloc)
+     build_state_zero(d);
+ 
+   s = s1 = 0;
+   p = (unsigned char *) begin;
+   trans = d->trans;
+   *end = '\n';
+ 
+   for (;;)
+     {
+       /* The dreaded inner loop. */
+       if ((t = trans[s]) != 0)
+ 	do
+ 	  {
+ 	    s1 = t[*p++];
+ 	    if (! (t = trans[s1]))
+ 	      goto last_was_s;
+ 	    s = t[*p++];
+ 	  }
+         while ((t = trans[s]) != 0);
+       goto last_was_s1;
+     last_was_s:
+       tmp = s, s = s1, s1 = tmp;
+     last_was_s1:
+ 
+       if (s >= 0 && p <= (unsigned char *) end && d->fails[s])
+ 	{
+ 	  if (d->success[s] & sbit[*p])
+ 	    {
+ 	      if (backref)
+ 		if (d->states[s].backref)
+ 		  *backref = 1;
+ 		else
+ 		  *backref = 0;
+ 	      return (char *) p;
+ 	    }
+ 
+ 	  s1 = s;
+ 	  s = d->fails[s][*p++];
+ 	  continue;
+ 	}
+ 
+       /* If the previous character was a newline, count it. */
+       if (count && (char *) p <= end && p[-1] == '\n')
+ 	++*count;
+ 
+       /* Check if we've run off the end of the buffer. */
+       if ((char *) p > end)
+ 	return NULL;
+ 
+       if (s >= 0)
+ 	{
+ 	  build_state(s, d);
+ 	  trans = d->trans;
+ 	  continue;
+ 	}
+ 
+       if (p[-1] == '\n' && newline)
+ 	{
+ 	  s = d->newlines[s1];
+ 	  continue;
+ 	}
+ 
+       s = 0;
+     }
+ }
+ 
+ /* Initialize the components of a dfa that the other routines don't
+    initialize for themselves. */
+ void
+ dfainit(d)
+      struct dfa *d;
+ {
+   d->calloc = 1;
+   MALLOC(d->charclasses, charclass, d->calloc);
+   d->cindex = 0;
+ 
+   d->talloc = 1;
+   MALLOC(d->tokens, token, d->talloc);
+   d->tindex = d->depth = d->nleaves = d->nregexps = 0;
+ 
+   d->searchflag = 0;
+   d->tralloc = 0;
+ 
+   d->musts = 0;
+ }
+ 
+ /* Parse and analyze a single string of the given length. */
+ void
+ dfacomp(s, len, d, searchflag)
+      char *s;
+      size_t len;
+      struct dfa *d;
+      int searchflag;
+ {
+   if (case_fold)	/* dummy folding in service of dfamust() */
+     {
+       char *lcopy, *p;
+       int i;
+ 
+       p = lcopy = malloc(len + 7);
+       if (!lcopy)
+ 	fprintf(stderr,"out of memory");
+       
+       /* This is a kludge. */
+       case_fold = 0;
+       if (current_mbctype != MBCTYPE_ASCII && searchflag) {
+ 	*p++ = '^';
+ 	*p++ = '.';
+ 	*p++ = '*';
+ 	if (!(syntax_bits & RE_NO_BK_PARENS))
+ 	  *p++ = '\\';
+ 	*p++ = '(';
+       }
+       for (i = 0; i < len; ++i)
+ 	if (ISUPPER(s[i]))
+ 	  *p++ = tolower((unsigned char)s[i]);
+ 	else
+ 	  if (ismbchar(*p++ = s[i]) && ++i < len)
+ 	    *p++ = s[i];
+       if (current_mbctype != MBCTYPE_ASCII && searchflag) {
+ 	if (!(syntax_bits & RE_NO_BK_PARENS))
+ 	  *p++ = '\\';
+ 	*p++ = ')';
+       }
+ 
+       dfainit(d);
+       dfaparse(lcopy, p - lcopy, d);
+       dfamust(d);
+       d->cindex = d->tindex = d->depth = d->nleaves = d->nregexps = 0;
+       case_fold = 1;
+       if (current_mbctype != MBCTYPE_ASCII && searchflag) {
+ 	bcopy(s, lcopy + (syntax_bits & RE_NO_BK_PARENS ? 4 : 5), len);
+ 	dfaparse(lcopy, p - lcopy, d);
+ 	free(lcopy);
+       }
+       else {
+ 	free(lcopy);
+ 	dfaparse(s, len, d);
+       }
+       dfaanalyze(d, searchflag);
+     }
+   else
+     {
+         dfainit(d);
+ 	if (current_mbctype != MBCTYPE_ASCII && searchflag) {
+ 	  char *lcopy, *p;
+ 
+ 	  p = lcopy = malloc(len + 7);
+ 	  *p++ = '^';
+ 	  *p++ = '.';
+ 	  *p++ = '*';
+ 	  if (!(syntax_bits & RE_NO_BK_PARENS))
+ 	    *p++ = '\\';
+ 	  *p++ = '(';
+ 	  bcopy(s, p, len);
+ 	  p += len;
+ 	  if (!(syntax_bits & RE_NO_BK_PARENS))
+ 	    *p++ = '\\';
+ 	  *p++ = ')';
+ 	  dfaparse(lcopy, p - lcopy, d);
+ 	  free(lcopy);
+ 	}
+ 	else
+ 	  dfaparse(s, len, d);
+ 	dfamust(d);
+         dfaanalyze(d, searchflag);
+     }
+ }
+ 
+ /* Free the storage held by the components of a dfa. */
+ void
+ dfafree(d)
+      struct dfa *d;
+ {
+   int i;
+   struct dfamust *dm, *ndm;
+ 
+   free((ptr_t) d->charclasses);
+   free((ptr_t) d->tokens);
+   for (i = 0; i < d->sindex; ++i)
+     free((ptr_t) d->states[i].elems.elems);
+   free((ptr_t) d->states);
+   for (i = 0; i < d->tindex; ++i)
+     if (d->follows[i].elems)
+       free((ptr_t) d->follows[i].elems);
+   free((ptr_t) d->follows);
+   for (i = 0; i < d->tralloc; ++i)
+     if (d->trans[i])
+       free((ptr_t) d->trans[i]);
+     else if (d->fails[i])
+       free((ptr_t) d->fails[i]);
+   free((ptr_t) d->realtrans);
+   free((ptr_t) d->fails);
+   free((ptr_t) d->newlines);
+   for (dm = d->musts; dm; dm = ndm)
+     {
+       ndm = dm->next;
+       free(dm->must);
+       free((ptr_t) dm);
+     }
+ }
+ 
+ /* Having found the postfix representation of the regular expression,
+    try to find a long sequence of characters that must appear in any line
+    containing the r.e.
+    Finding a "longest" sequence is beyond the scope here;
+    we take an easy way out and hope for the best.
+    (Take "(ab|a)b"--please.)
+ 
+    We do a bottom-up calculation of sequences of characters that must appear
+    in matches of r.e.'s represented by trees rooted at the nodes of the postfix
+    representation:
+ 	sequences that must appear at the left of the match ("left")
+ 	sequences that must appear at the right of the match ("right")
+ 	lists of sequences that must appear somewhere in the match ("in")
+ 	sequences that must constitute the match ("is")
+ 
+    When we get to the root of the tree, we use one of the longest of its
+    calculated "in" sequences as our answer.  The sequence we find is returned in
+    d->must (where "d" is the single argument passed to "dfamust");
+    the length of the sequence is returned in d->mustn.
+ 
+    The sequences calculated for the various types of node (in pseudo ANSI c)
+    are shown below.  "p" is the operand of unary operators (and the left-hand
+    operand of binary operators); "q" is the right-hand operand of binary
+    operators.
+ 
+    "ZERO" means "a zero-length sequence" below.
+ 
+ 	Type	left		right		is		in
+ 	----	----		-----		--		--
+ 	char c	# c		# c		# c		# c
+ 	
+ 	CSET	ZERO		ZERO		ZERO		ZERO
+ 	
+ 	STAR	ZERO		ZERO		ZERO		ZERO
+ 
+ 	QMARK	ZERO		ZERO		ZERO		ZERO
+ 
+ 	PLUS	p->left		p->right	ZERO		p->in
+ 
+ 	CAT	(p->is==ZERO)?	(q->is==ZERO)?	(p->is!=ZERO &&	p->in plus
+ 		p->left :	q->right :	q->is!=ZERO) ?	q->in plus
+ 		p->is##q->left	p->right##q->is	p->is##q->is :	p->right##q->left
+ 						ZERO
+ 					
+ 	OR	longest common	longest common	(do p->is and	substrings common to
+ 		leading		trailing	q->is have same	p->in and q->in
+ 		(sub)sequence	(sub)sequence	length and	
+ 		of p->left	of p->right	content) ?	
+ 		and q->left	and q->right	p->is : NULL	
+ 
+    If there's anything else we recognize in the tree, all four sequences get set
+    to zero-length sequences.  If there's something we don't recognize in the tree,
+    we just return a zero-length sequence.
+ 
+    Break ties in favor of infrequent letters (choosing 'zzz' in preference to
+    'aaa')?
+ 
+    And. . .is it here or someplace that we might ponder "optimizations" such as
+ 	egrep 'psi|epsilon'	->	egrep 'psi'
+ 	egrep 'pepsi|epsilon'	->	egrep 'epsi'
+ 					(Yes, we now find "epsi" as a "string
+ 					that must occur", but we might also
+ 					simplify the *entire* r.e. being sought)
+ 	grep '[c]'		->	grep 'c'
+ 	grep '(ab|a)b'		->	grep 'ab'
+ 	grep 'ab*'		->	grep 'a'
+ 	grep 'a*b'		->	grep 'b'
+ 
+    There are several issues:
+ 
+    Is optimization easy (enough)?
+ 
+    Does optimization actually accomplish anything,
+    or is the automaton you get from "psi|epsilon" (for example)
+    the same as the one you get from "psi" (for example)?
+   
+    Are optimizable r.e.'s likely to be used in real-life situations
+    (something like 'ab*' is probably unlikely; something like is
+    'psi|epsilon' is likelier)? */
+ 
+ static char *
+ icatalloc(old, new)
+      char *old;
+      char *new;
+ {
+   char *result;
+   size_t oldsize, newsize;
+ 
+   newsize = (new == NULL) ? 0 : strlen(new);
+   if (old == NULL)
+     oldsize = 0;
+   else if (newsize == 0)
+     return old;
+   else	oldsize = strlen(old);
+   if (old == NULL)
+     result = (char *) malloc(newsize + 1);
+   else
+     result = (char *) realloc((void *) old, oldsize + newsize + 1);
+   if (result != NULL && new != NULL)
+     (void) strcpy(result + oldsize, new);
+   return result;
+ }
+ 
+ static char *
+ icpyalloc(string)
+      char *string;
+ {
+   return icatalloc((char *) NULL, string);
+ }
+ 
+ static char *
+ istrstr(lookin, lookfor)
+      char *lookin;
+      char *lookfor;
+ {
+   char *cp;
+   size_t len;
+ 
+   len = strlen(lookfor);
+   for (cp = lookin; *cp != '\0'; ++cp)
+     if (strncmp(cp, lookfor, len) == 0)
+       return cp;
+   return NULL;
+ }
+ 
+ static void
+ ifree(cp)
+      char *cp;
+ {
+   if (cp != NULL)
+     free(cp);
+ }
+ 
+ static void
+ freelist(cpp)
+      char **cpp;
+ {
+   int i;
+ 
+   if (cpp == NULL)
+     return;
+   for (i = 0; cpp[i] != NULL; ++i)
+     {
+       free(cpp[i]);
+       cpp[i] = NULL;
+     }
+ }
+ 
+ static char **
+ enlist(cpp, new, len)
+      char **cpp;
+      char *new;
+      size_t len;
+ {
+   int i, j;
+ 
+   if (cpp == NULL)
+     return NULL;
+   if ((new = icpyalloc(new)) == NULL)
+     {
+       freelist(cpp);
+       return NULL;
+     }
+   new[len] = '\0';
+   /* Is there already something in the list that's new (or longer)? */
+   for (i = 0; cpp[i] != NULL; ++i)
+     if (istrstr(cpp[i], new) != NULL)
+       {
+ 	free(new);
+ 	return cpp;
+       }
+   /* Eliminate any obsoleted strings. */
+   j = 0;
+   while (cpp[j] != NULL)
+     if (istrstr(new, cpp[j]) == NULL)
+       ++j;
+     else
+       {
+ 	free(cpp[j]);
+ 	if (--i == j)
+ 	  break;
+ 	cpp[j] = cpp[i];
+ 	cpp[i] = NULL;
+       }
+   /* Add the new string. */
+   cpp = (char **) realloc((char *) cpp, (i + 2) * sizeof *cpp);
+   if (cpp == NULL)
+     return NULL;
+   cpp[i] = new;
+   cpp[i + 1] = NULL;
+   return cpp;
+ }
+ 
+ /* Given pointers to two strings, return a pointer to an allocated
+    list of their distinct common substrings. Return NULL if something
+    seems wild. */
+ static char **
+ comsubs(left, right)
+      char *left;
+      char *right;
+ {
+   char **cpp;
+   char *lcp;
+   char *rcp;
+   size_t i, len;
+ 
+   if (left == NULL || right == NULL)
+     return NULL;
+   cpp = (char **) malloc(sizeof *cpp);
+   if (cpp == NULL)
+     return NULL;
+   cpp[0] = NULL;
+   for (lcp = left; *lcp != '\0'; ++lcp)
+     {
+       len = 0;
+       rcp = index(right, *lcp);
+       while (rcp != NULL)
+ 	{
+ 	  for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
+ 	    ;
+ 	  if (i > len)
+ 	    len = i;
+ 	  rcp = index(rcp + 1, *lcp);
+ 	}
+       if (len == 0)
+ 	continue;
+       if ((cpp = enlist(cpp, lcp, len)) == NULL)
+ 	break;
+     }
+   return cpp;
+ }
+ 
+ static char **
+ addlists(old, new)
+ char **old;
+ char **new;
+ {
+   int i;
+ 
+   if (old == NULL || new == NULL)
+     return NULL;
+   for (i = 0; new[i] != NULL; ++i)
+     {
+       old = enlist(old, new[i], strlen(new[i]));
+       if (old == NULL)
+ 	break;
+     }
+   return old;
+ }
+ 
+ /* Given two lists of substrings, return a new list giving substrings
+    common to both. */
+ static char **
+ inboth(left, right)
+      char **left;
+      char **right;
+ {
+   char **both;
+   char **temp;
+   int lnum, rnum;
+ 
+   if (left == NULL || right == NULL)
+     return NULL;
+   both = (char **) malloc(sizeof *both);
+   if (both == NULL)
+     return NULL;
+   both[0] = NULL;
+   for (lnum = 0; left[lnum] != NULL; ++lnum)
+     {
+       for (rnum = 0; right[rnum] != NULL; ++rnum)
+ 	{
+ 	  temp = comsubs(left[lnum], right[rnum]);
+ 	  if (temp == NULL)
+ 	    {
+ 	      freelist(both);
+ 	      return NULL;
+ 	    }
+ 	  both = addlists(both, temp);
+ 	  freelist(temp);
+ 	  if (both == NULL)
+ 	    return NULL;
+ 	}
+     }
+   return both;
+ }
+ 
+ typedef struct
+ {
+   char **in;
+   char *left;
+   char *right;
+   char *is;
+ } must;
+ 
+ static void
+ resetmust(mp)
+ must *mp;
+ {
+   mp->left[0] = mp->right[0] = mp->is[0] = '\0';
+   freelist(mp->in);
+ }
+ 
+ static void
+ dfamust(dfa)
+ struct dfa *dfa;
+ {
+   must *musts;
+   must *mp;
+   char *result;
+   int ri;
+   int i;
+   int exact;
+   token t;
+   static must must0;
+   struct dfamust *dm;
+   static char empty_string[] = "";
+ 
+   result = empty_string;
+   exact = 0;
+   musts = (must *) malloc((dfa->tindex + 1) * sizeof *musts);
+   if (musts == NULL)
+     return;
+   mp = musts;
+   for (i = 0; i <= dfa->tindex; ++i)
+     mp[i] = must0;
+   for (i = 0; i <= dfa->tindex; ++i)
+     {
+       mp[i].in = (char **) malloc(sizeof *mp[i].in);
+       mp[i].left = malloc(2);
+       mp[i].right = malloc(2);
+       mp[i].is = malloc(2);
+       if (mp[i].in == NULL || mp[i].left == NULL ||
+ 	  mp[i].right == NULL || mp[i].is == NULL)
+ 	goto done;
+       mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0';
+       mp[i].in[0] = NULL;
+     }
+ #ifdef DEBUG
+   fprintf(stderr, "dfamust:\n");
+   for (i = 0; i < dfa->tindex; ++i)
+     {
+       fprintf(stderr, " %d:", i);
+       prtok(dfa->tokens[i]);
+     }
+   putc('\n', stderr);
+ #endif
+   for (ri = 0; ri < dfa->tindex; ++ri)
+     {
+       switch (t = dfa->tokens[ri])
+ 	{
+ 	case LPAREN:
+ 	case RPAREN:
+ 	  goto done;		/* "cannot happen" */
+ 	case EMPTY:
+ 	case BEGLINE:
+ 	case ENDLINE:
+ 	case BEGWORD:
+ 	case ENDWORD:
+ 	case LIMWORD:
+ 	case NOTLIMWORD:
+ 	case BACKREF:
+ 	  resetmust(mp);
+ 	  break;
+ 	case STAR:
+ 	case QMARK:
+ 	  if (mp <= musts)
+ 	    goto done;		/* "cannot happen" */
+ 	  --mp;
+ 	  resetmust(mp);
+ 	  break;
+ 	case OR:
+ 	case ORTOP:
+ 	  if (mp < &musts[2])
+ 	    goto done;		/* "cannot happen" */
+ 	  {
+ 	    char **new;
+ 	    must *lmp;
+ 	    must *rmp;
+ 	    int j, ln, rn, n;
+ 
+ 	    rmp = --mp;
+ 	    lmp = --mp;
+ 	    /* Guaranteed to be.  Unlikely, but. . . */
+ 	    if (strcmp(lmp->is, rmp->is) != 0)
+ 	      lmp->is[0] = '\0';
+ 	    /* Left side--easy */
+ 	    i = 0;
+ 	    while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i])
+ 	      ++i;
+ 	    lmp->left[i] = '\0';
+ 	    /* Right side */
+ 	    ln = strlen(lmp->right);
+ 	    rn = strlen(rmp->right);
+ 	    n = ln;
+ 	    if (n > rn)
+ 	      n = rn;
+ 	    for (i = 0; i < n; ++i)
+ 	      if (lmp->right[ln - i - 1] != rmp->right[rn - i - 1])
+ 		break;
+ 	    for (j = 0; j < i; ++j)
+ 	      lmp->right[j] = lmp->right[(ln - i) + j];
+ 	    lmp->right[j] = '\0';
+ 	    new = inboth(lmp->in, rmp->in);
+ 	    if (new == NULL)
+ 	      goto done;
+ 	    freelist(lmp->in);
+ 	    free((char *) lmp->in);
+ 	    lmp->in = new;
+ 	  }
+ 	  break;
+ 	case PLUS:
+ 	  if (mp <= musts)
+ 	    goto done;		/* "cannot happen" */
+ 	  --mp;
+ 	  mp->is[0] = '\0';
+ 	  break;
+ 	case END:
+ 	  if (mp != &musts[1])
+ 	    goto done;		/* "cannot happen" */
+ 	  for (i = 0; musts[0].in[i] != NULL; ++i)
+ 	    if (strlen(musts[0].in[i]) > strlen(result))
+ 	      result = musts[0].in[i];
+ 	  if (strcmp(result, musts[0].is) == 0)
+ 	    exact = 1;
+ 	  goto done;
+ 	case CAT:
+ 	  if (mp < &musts[2])
+ 	    goto done;		/* "cannot happen" */
+ 	  {
+ 	    must *lmp;
+ 	    must *rmp;
+ 
+ 	    rmp = --mp;
+ 	    lmp = --mp;
+ 	    /* In.  Everything in left, plus everything in
+ 	       right, plus catenation of
+ 	       left's right and right's left. */
+ 	    lmp->in = addlists(lmp->in, rmp->in);
+ 	    if (lmp->in == NULL)
+ 	      goto done;
+ 	    if (lmp->right[0] != '\0' &&
+ 		rmp->left[0] != '\0')
+ 	      {
+ 		char *tp;
+ 
+ 		tp = icpyalloc(lmp->right);
+ 		if (tp == NULL)
+ 		  goto done;
+ 		tp = icatalloc(tp, rmp->left);
+ 		if (tp == NULL)
+ 		  goto done;
+ 		lmp->in = enlist(lmp->in, tp,
+ 				 strlen(tp));
+ 		free(tp);
+ 		if (lmp->in == NULL)
+ 		  goto done;
+ 	      }
+ 	    /* Left-hand */
+ 	    if (lmp->is[0] != '\0')
+ 	      {
+ 		lmp->left = icatalloc(lmp->left,
+ 				      rmp->left);
+ 		if (lmp->left == NULL)
+ 		  goto done;
+ 	      }
+ 	    /* Right-hand */
+ 	    if (rmp->is[0] == '\0')
+ 	      lmp->right[0] = '\0';
+ 	    lmp->right = icatalloc(lmp->right, rmp->right);
+ 	    if (lmp->right == NULL)
+ 	      goto done;
+ 	    /* Guaranteed to be */
+ 	    if (lmp->is[0] != '\0' && rmp->is[0] != '\0')
+ 	      {
+ 		lmp->is = icatalloc(lmp->is, rmp->is);
+ 		if (lmp->is == NULL)
+ 		  goto done;
+ 	      }
+ 	    else
+ 	      lmp->is[0] = '\0';
+ 	  }
+ 	  break;
+ 	default:
+ 	  if (t < END)
+ 	    {
+ 	      /* "cannot happen" */
+ 	      goto done;
+ 	    }
+ 	  else if (t == '\0')
+ 	    {
+ 	      /* not on *my* shift */
+ 	      goto done;
+ 	    }
+ 	  else if (t >= CSET)
+ 	    {
+ 	      /* easy enough */
+ 	      resetmust(mp);
+ 	    }
+ 	  else
+ 	    {
+ 	      /* plain character */
+ 	      resetmust(mp);
+ 	      mp->is[0] = mp->left[0] = mp->right[0] = t;
+ 	      mp->is[1] = mp->left[1] = mp->right[1] = '\0';
+ 	      mp->in = enlist(mp->in, mp->is, (size_t)1);
+ 	      if (mp->in == NULL)
+ 		goto done;
+ 	    }
+ 	  break;
+ 	}
+ #ifdef DEBUG
+       fprintf(stderr, " node: %d:", ri);
+       prtok(dfa->tokens[ri]);
+       fprintf(stderr, "\n  in:");
+       for (i = 0; mp->in[i]; ++i)
+ 	fprintf(stderr, " \"%s\"", mp->in[i]);
+       fprintf(stderr, "\n  is: \"%s\"\n", mp->is);
+       fprintf(stderr, "  left: \"%s\"\n", mp->left);
+       fprintf(stderr, "  right: \"%s\"\n", mp->right);
+ #endif
+       ++mp;
+     }
+  done:
+   if (strlen(result))
+     {
+       dm = (struct dfamust *) malloc(sizeof (struct dfamust));
+       dm->exact = exact;
+       dm->must = malloc(strlen(result) + 1);
+       strcpy(dm->must, result);
+       dm->next = dfa->musts;
+       dfa->musts = dm;
+     }
+   mp = musts;
+   for (i = 0; i <= dfa->tindex; ++i)
+     {
+       freelist(mp[i].in);
+       ifree((char *) mp[i].in);
+       ifree(mp[i].left);
+       ifree(mp[i].right);
+       ifree(mp[i].is);
+     }
+   free((char *) mp);
+ }
diff -crP php-2.0.1/src/jp.regex/dfa.h php-2.0.1.jp_urat-5.3/src/jp.regex/dfa.h
*** php-2.0.1/src/jp.regex/dfa.h	Thu Jan  1 09:00:00 1970
--- php-2.0.1.jp_urat-5.3/src/jp.regex/dfa.h	Wed Feb 18 21:03:40 1998
***************
*** 0 ****
--- 1,360 ----
+ /* dfa.h - declarations for GNU deterministic regexp compiler
+    Copyright (C) 1988 Free Software Foundation, Inc.
+ 
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2, or (at your option)
+    any later version.
+ 
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+ 
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+ 
+ /* Written June, 1988 by Mike Haertel */
+ 
+ /* FIXME:
+    2.  We should not export so much of the DFA internals.
+    In addition to clobbering modularity, we eat up valuable
+    name space. */
+ 
+ /* Number of bits in an unsigned char. */
+ #define CHARBITS 8
+ 
+ /* First integer value that is greater than any character code. */
+ #define NOTCHAR (1 << CHARBITS)
+ 
+ /* INTBITS need not be exact, just a lower bound. */
+ #define INTBITS (CHARBITS * sizeof (int))
+ 
+ /* Number of ints required to hold a bit for every character. */
+ #define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)
+ 
+ /* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
+ typedef int charclass[CHARCLASS_INTS];
+ 
+ /* The regexp is parsed into an array of tokens in postfix form.  Some tokens
+    are operators and others are terminal symbols.  Most (but not all) of these
+    codes are returned by the lexical analyzer. */
+ 
+ typedef enum
+ {
+   END = -1,			/* END is a terminal symbol that matches the
+ 				   end of input; any value of END or less in
+ 				   the parse tree is such a symbol.  Accepting
+ 				   states of the DFA are those that would have
+ 				   a transition on END. */
+ 
+   /* Ordinary character values are terminal symbols that match themselves. */
+ 
+   EMPTY = NOTCHAR,		/* EMPTY is a terminal symbol that matches
+ 				   the empty string. */
+ 
+   BACKREF,			/* BACKREF is generated by \<digit>; it
+ 				   it not completely handled.  If the scanner
+ 				   detects a transition on backref, it returns
+ 				   a kind of "semi-success" indicating that
+ 				   the match will have to be verified with
+ 				   a backtracking matcher. */
+ 
+   BEGLINE,			/* BEGLINE is a terminal symbol that matches
+ 				   the empty string if it is at the beginning
+ 				   of a line. */
+ 
+   ENDLINE,			/* ENDLINE is a terminal symbol that matches
+ 				   the empty string if it is at the end of
+ 				   a line. */
+ 
+   BEGWORD,			/* BEGWORD is a terminal symbol that matches
+ 				   the empty string if it is at the beginning
+ 				   of a word. */
+ 
+   ENDWORD,			/* ENDWORD is a terminal symbol that matches
+ 				   the empty string if it is at the end of
+ 				   a word. */
+ 
+   LIMWORD,			/* LIMWORD is a terminal symbol that matches
+ 				   the empty string if it is at the beginning
+ 				   or the end of a word. */
+ 
+   NOTLIMWORD,			/* NOTLIMWORD is a terminal symbol that
+ 				   matches the empty string if it is not at
+ 				   the beginning or end of a word. */
+ 
+   QMARK,			/* QMARK is an operator of one argument that
+ 				   matches zero or one occurences of its
+ 				   argument. */
+ 
+   STAR,				/* STAR is an operator of one argument that
+ 				   matches the Kleene closure (zero or more
+ 				   occurrences) of its argument. */
+ 
+   PLUS,				/* PLUS is an operator of one argument that
+ 				   matches the positive closure (one or more
+ 				   occurrences) of its argument. */
+ 
+   REPMN,			/* REPMN is a lexical token corresponding
+ 				   to the {m,n} construct.  REPMN never
+ 				   appears in the compiled token vector. */
+ 
+   CAT,				/* CAT is an operator of two arguments that
+ 				   matches the concatenation of its
+ 				   arguments.  CAT is never returned by the
+ 				   lexical analyzer. */
+ 
+   OR,				/* OR is an operator of two arguments that
+ 				   matches either of its arguments. */
+ 
+   ORTOP,			/* OR at the toplevel in the parse tree.
+ 				   This is used for a boyer-moore heuristic. */
+ 
+   LPAREN,			/* LPAREN never appears in the parse tree,
+ 				   it is only a lexeme. */
+ 
+   RPAREN,			/* RPAREN never appears in the parse tree. */
+ 
+   CSET				/* CSET and (and any value greater) is a
+ 				   terminal symbol that matches any of a
+ 				   class of characters. */
+ } token;
+ 
+ /* Sets are stored in an array in the compiled dfa; the index of the
+    array corresponding to a given set token is given by SET_INDEX(t). */
+ #define SET_INDEX(t) ((t) - CSET)
+ 
+ /* Sometimes characters can only be matched depending on the surrounding
+    context.  Such context decisions depend on what the previous character
+    was, and the value of the current (lookahead) character.  Context
+    dependent constraints are encoded as 8 bit integers.  Each bit that
+    is set indicates that the constraint succeeds in the corresponding
+    context.
+ 
+    bit 7 - previous and current are newlines
+    bit 6 - previous was newline, current isn't
+    bit 5 - previous wasn't newline, current is
+    bit 4 - neither previous nor current is a newline
+    bit 3 - previous and current are word-constituents
+    bit 2 - previous was word-constituent, current isn't
+    bit 1 - previous wasn't word-constituent, current is
+    bit 0 - neither previous nor current is word-constituent
+ 
+    Word-constituent characters are those that satisfy isalnum().
+ 
+    The macro SUCCEEDS_IN_CONTEXT determines whether a a given constraint
+    succeeds in a particular context.  Prevn is true if the previous character
+    was a newline, currn is true if the lookahead character is a newline.
+    Prevl and currl similarly depend upon whether the previous and current
+    characters are word-constituent letters. */
+ #define MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
+   ((constraint) & 1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4))
+ #define MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
+   ((constraint) & 1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0)))
+ #define SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
+   (MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn)		     \
+    && MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
+ 
+ /* The following macros give information about what a constraint depends on. */
+ #define PREV_NEWLINE_DEPENDENT(constraint) \
+   (((constraint) & 0xc0) >> 2 != ((constraint) & 0x30))
+ #define PREV_LETTER_DEPENDENT(constraint) \
+   (((constraint) & 0x0c) >> 2 != ((constraint) & 0x03))
+ 
+ /* Tokens that match the empty string subject to some constraint actually
+    work by applying that constraint to determine what may follow them,
+    taking into account what has gone before.  The following values are
+    the constraints corresponding to the special tokens previously defined. */
+ #define NO_CONSTRAINT 0xff
+ #define BEGLINE_CONSTRAINT 0xcf
+ #define ENDLINE_CONSTRAINT 0xaf
+ #define BEGWORD_CONSTRAINT 0xf2
+ #define ENDWORD_CONSTRAINT 0xf4
+ #define LIMWORD_CONSTRAINT 0xf6
+ #define NOTLIMWORD_CONSTRAINT 0xf9
+ 
+ /* States of the recognizer correspond to sets of positions in the parse
+    tree, together with the constraints under which they may be matched.
+    So a position is encoded as an index into the parse tree together with
+    a constraint. */
+ typedef struct
+ {
+   unsigned index;		/* Index into the parse array. */
+   unsigned constraint;		/* Constraint for matching this position. */
+ } position;
+ 
+ /* Sets of positions are stored as arrays. */
+ typedef struct
+ {
+   position *elems;		/* Elements of this position set. */
+   int nelem;			/* Number of elements in this set. */
+ } position_set;
+ 
+ /* A state of the dfa consists of a set of positions, some flags,
+    and the token value of the lowest-numbered position of the state that
+    contains an END token. */
+ typedef struct
+ {
+   int hash;			/* Hash of the positions of this state. */
+   position_set elems;		/* Positions this state could match. */
+   char newline;			/* True if previous state matched newline. */
+   char letter;			/* True if previous state matched a letter. */
+   char backref;			/* True if this state matches a \<digit>. */
+   unsigned char constraint;	/* Constraint for this state to accept. */
+   int first_end;		/* Token value of the first END in elems. */
+ } dfa_state;
+ 
+ /* Element of a list of strings, at least one of which is known to
+    appear in any R.E. matching the DFA. */
+ struct dfamust
+ {
+   int exact;
+   char *must;
+   struct dfamust *next;
+ };
+ 
+ /* A compiled regular expression. */
+ struct dfa
+ {
+   /* Stuff built by the scanner. */
+   charclass *charclasses;	/* Array of character sets for CSET tokens. */
+   int cindex;			/* Index for adding new charclasses. */
+   int calloc;			/* Number of charclasses currently allocated. */
+ 
+   /* Stuff built by the parser. */
+   token *tokens;		/* Postfix parse array. */
+   int tindex;			/* Index for adding new tokens. */
+   int talloc;			/* Number of tokens currently allocated. */
+   int depth;			/* Depth required of an evaluation stack
+ 				   used for depth-first traversal of the
+ 				   parse tree. */
+   int nleaves;			/* Number of leaves on the parse tree. */
+   int nregexps;			/* Count of parallel regexps being built
+ 				   with dfaparse(). */
+ 
+   /* Stuff owned by the state builder. */
+   dfa_state *states;		/* States of the dfa. */
+   int sindex;			/* Index for adding new states. */
+   int salloc;			/* Number of states currently allocated. */
+ 
+   /* Stuff built by the structure analyzer. */
+   position_set *follows;	/* Array of follow sets, indexed by position
+ 				   index.  The follow of a position is the set
+ 				   of positions containing characters that
+ 				   could conceivably follow a character
+ 				   matching the given position in a string
+ 				   matching the regexp.  Allocated to the
+ 				   maximum possible position index. */
+   int searchflag;		/* True if we are supposed to build a searching
+ 				   as opposed to an exact matcher.  A searching
+ 				   matcher finds the first and shortest string
+ 				   matching a regexp anywhere in the buffer,
+ 				   whereas an exact matcher finds the longest
+ 				   string matching, but anchored to the
+ 				   beginning of the buffer. */
+ 
+   /* Stuff owned by the executor. */
+   int tralloc;			/* Number of transition tables that have
+ 				   slots so far. */
+   int trcount;			/* Number of transition tables that have
+ 				   actually been built. */
+   int **trans;			/* Transition tables for states that can
+ 				   never accept.  If the transitions for a
+ 				   state have not yet been computed, or the
+ 				   state could possibly accept, its entry in
+ 				   this table is NULL. */
+   int **realtrans;		/* Trans always points to realtrans + 1; this
+ 				   is so trans[-1] can contain NULL. */
+   int **fails;			/* Transition tables after failing to accept
+ 				   on a state that potentially could do so. */
+   int *success;			/* Table of acceptance conditions used in
+ 				   dfaexec and computed in build_state. */
+   int *newlines;		/* Transitions on newlines.  The entry for a
+ 				   newline in any transition table is always
+ 				   -1 so we can count lines without wasting
+ 				   too many cycles.  The transition for a
+ 				   newline is stored separately and handled
+ 				   as a special case.  Newline is also used
+ 				   as a sentinel at the end of the buffer. */
+   struct dfamust *musts;	/* List of strings, at least one of which
+ 				   is known to appear in any r.e. matching
+ 				   the dfa. */
+ };
+ 
+ /* Some macros for user access to dfa internals. */
+ 
+ /* ACCEPTING returns true if s could possibly be an accepting state of r. */
+ #define ACCEPTING(s, r) ((r).states[s].constraint)
+ 
+ /* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the
+    specified context. */
+ #define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, dfa) \
+   SUCCEEDS_IN_CONTEXT((dfa).states[state].constraint,		   \
+ 		       prevn, currn, prevl, currl)
+ 
+ /* FIRST_MATCHING_REGEXP returns the index number of the first of parallel
+    regexps that a given state could accept.  Parallel regexps are numbered
+    starting at 1. */
+ #define FIRST_MATCHING_REGEXP(state, dfa) (-(dfa).states[state].first_end)
+ 
+ /* Entry points. */
+ 
+ #ifdef __STDC__
+ 
+ /* dfasyntax() takes two arguments; the first sets the syntax bits described
+    earlier in this file, and the second sets the case-folding flag. */
+ extern void dfasyntax(reg_syntax_t, int);
+ 
+ /* Compile the given string of the given length into the given struct dfa.
+    Final argument is a flag specifying whether to build a searching or an
+    exact matcher. */
+ extern void dfacomp(char *, size_t, struct dfa *, int);
+ 
+ /* Execute the given struct dfa on the buffer of characters.  The
+    first char * points to the beginning, and the second points to the
+    first character after the end of the buffer, which must be a writable
+    place so a sentinel end-of-buffer marker can be stored there.  The
+    second-to-last argument is a flag telling whether to allow newlines to
+    be part of a string matching the regexp.  The next-to-last argument,
+    if non-NULL, points to a place to increment every time we see a
+    newline.  The final argument, if non-NULL, points to a flag that will
+    be set if further examination by a backtracking matcher is needed in
+    order to verify backreferencing; otherwise the flag will be cleared.
+    Returns NULL if no match is found, or a pointer to the first
+    character after the first & shortest matching string in the buffer. */
+ extern char *dfaexec(struct dfa *, char *, char *, int, int *, int *);
+ 
+ /* Free the storage held by the components of a struct dfa. */
+ extern void dfafree(struct dfa *);
+ 
+ /* Entry points for people who know what they're doing. */
+ 
+ /* Initialize the components of a struct dfa. */
+ extern void dfainit(struct dfa *);
+ 
+ /* Incrementally parse a string of given length into a struct dfa. */
+ extern void dfaparse(char *, size_t, struct dfa *);
+ 
+ /* Analyze a parsed regexp; second argument tells whether to build a searching
+    or an exact matcher. */
+ extern void dfaanalyze(struct dfa *, int);
+ 
+ /* Compute, for each possible character, the transitions out of a given
+    state, storing them in an array of integers. */
+ extern void dfastate(int, struct dfa *, int []);
+ 
+ /* Error handling. */
+ 
+ /* dfaerror() is called by the regexp routines whenever an error occurs.  It
+    takes a single argument, a NUL-terminated string describing the error.
+    The default dfaerror() prints the error message to stderr and exits.
+    The user can provide a different dfafree() if so desired. */
+ extern void dfaerror(const char *);
+ 
+ #else /* ! __STDC__ */
+ extern void dfasyntax(), dfacomp(), dfafree(), dfainit(), dfaparse();
+ extern void dfaanalyze(), dfastate(), dfaerror();
+ extern char *dfaexec();
+ #endif /* ! __STDC__ */
Binary files php-2.0.1/src/jp.regex/dfa.o and php-2.0.1.jp_urat-5.3/src/jp.regex/dfa.o differ
Binary files php-2.0.1/src/jp.regex/libregex.a and php-2.0.1.jp_urat-5.3/src/jp.regex/libregex.a differ
diff -crP php-2.0.1/src/jp.regex/mbc.c php-2.0.1.jp_urat-5.3/src/jp.regex/mbc.c
*** php-2.0.1/src/jp.regex/mbc.c	Thu Jan  1 09:00:00 1970
--- php-2.0.1.jp_urat-5.3/src/jp.regex/mbc.c	Wed Feb 18 21:03:40 1998
***************
*** 0 ****
--- 1,98 ----
+ /* Functions for multi-byte support.
+    Created for grep multi-byte extension Jul., 1993 by t^2 (Takahiro Tanimoto)
+    Last change: Jul. 9, 1993 by t^2  */
+ #include "mbc.h"
+ 
+ static const unsigned char mbctab_ascii[] = {
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+ 
+ static const unsigned char mbctab_euc[] = {
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ };
+ 
+ static const unsigned char mbctab_sjis[] = {
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ };
+ 
+ #ifdef EUC
+ const unsigned char *mbctab = mbctab_euc;
+ int current_mbctype = MBCTYPE_EUC;
+ #else
+ #ifdef SJIS
+ const unsigned char *mbctab = mbctab_sjis;
+ int current_mbctype = MBCTYPE_SJIS;
+ #else
+ const unsigned char *mbctab = mbctab_ascii;
+ int current_mbctype = MBCTYPE_ASCII;
+ #endif
+ #endif
+ 
+ void
+ #ifdef __STDC__
+ mbcinit(int mbctype)
+ #else
+ mbcinit(mbctype)
+      int mbctype;
+ #endif
+ {
+   switch (mbctype) {
+   case MBCTYPE_ASCII:
+     mbctab = mbctab_ascii;
+     current_mbctype = MBCTYPE_ASCII;
+     break;
+   case MBCTYPE_EUC:
+     mbctab = mbctab_euc;
+     current_mbctype = MBCTYPE_EUC;
+     break;
+   case MBCTYPE_SJIS:
+     mbctab = mbctab_sjis;
+     current_mbctype = MBCTYPE_SJIS;
+     break;
+   }
+ }
diff -crP php-2.0.1/src/jp.regex/mbc.h php-2.0.1.jp_urat-5.3/src/jp.regex/mbc.h
*** php-2.0.1/src/jp.regex/mbc.h	Thu Jan  1 09:00:00 1970
--- php-2.0.1.jp_urat-5.3/src/jp.regex/mbc.h	Wed Feb 18 21:03:40 1998
***************
*** 0 ****
--- 1,38 ----
+ #ifndef MBC_H
+ #define MBC_H 1
+ /* Definitions for multi-byte support.
+    Created for grep multi-byte extension Jul., 1993 by t^2 (Takahiro Tanimoto)
+    Last change: Aug. 29, 1994 by t^2  */
+ 
+ #ifndef const
+ #ifndef __STDC__
+ #ifdef __GNUC__
+ #define const __const__
+ #define volatile __volatile__
+ #else
+ #define const
+ #define volatile
+ #endif
+ #endif
+ #endif
+ 
+ #ifndef _
+ #ifdef __STDC__
+ #define _(x) x
+ #else
+ #define _(x) ()
+ #endif
+ #endif
+ 
+ #define MBCTYPE_ASCII 0
+ #define MBCTYPE_EUC 1
+ #define MBCTYPE_SJIS 2
+ 
+ extern const unsigned char *mbctab;
+ extern int current_mbctype;
+ 
+ void mbcinit _((int));
+ 
+ #define ismbchar(c) mbctab[(unsigned char)(c)]
+ 
+ #endif /* !MBC_H */
Binary files php-2.0.1/src/jp.regex/mbc.o and php-2.0.1.jp_urat-5.3/src/jp.regex/mbc.o differ
diff -crP php-2.0.1/src/jp.regex/regex.c php-2.0.1.jp_urat-5.3/src/jp.regex/regex.c
*** php-2.0.1/src/jp.regex/regex.c	Thu Jan  1 09:00:00 1970
--- php-2.0.1.jp_urat-5.3/src/jp.regex/regex.c	Wed Feb 18 21:03:41 1998
***************
*** 0 ****
--- 1,5475 ----
+ /* Extended regular expression matching and search library,
+    version 0.12.
+    (Implements POSIX draft P10003.2/D11.2, except for
+    internationalization features.)
+ 
+    Copyright (C) 1993 Free Software Foundation, Inc.
+ 
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2, or (at your option)
+    any later version.
+ 
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+ 
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+ /* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto)
+    Last change: Aug. 29, 1994 by t^2  */
+ 
+ /* AIX requires this to be the first thing in the file. */
+ #if defined (_AIX) && !defined (REGEX_MALLOC)
+   #pragma alloca
+ #endif
+ 
+ #define _GNU_SOURCE
+ 
+ #ifdef HAVE_CONFIG_H
+ #include "config.h"
+ #endif
+ 
+ #if defined(STDC_HEADERS) && !defined(emacs)
+ #include <stddef.h>
+ #else
+ /* We need this for `regex.h', and perhaps for the Emacs include files.  */
+ #include <sys/types.h>
+ #endif
+ 
+ /* The `emacs' switch turns on certain matching commands
+    that make sense only in Emacs. */
+ #ifdef emacs
+ 
+ #include "lisp.h"
+ #include "buffer.h"
+ #include "syntax.h"
+ 
+ /* Emacs uses `NULL' as a predicate.  */
+ #undef NULL
+ 
+ #else  /* not emacs */
+ 
+ /* We used to test for `BSTRING' here, but only GCC and Emacs define
+    `BSTRING', as far as I know, and neither of them use this code.  */
+ #if HAVE_STRING_H || STDC_HEADERS
+ #include <string.h>
+ #ifndef bcmp
+ #define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
+ #endif
+ #ifndef bcopy
+ #define bcopy(s, d, n)	memmove ((d), (s), (n))
+ #endif
+ #ifndef bzero
+ #define bzero(s, n)	memset ((s), 0, (n))
+ #endif
+ #else
+ #include <strings.h>
+ #endif
+ 
+ #ifdef STDC_HEADERS
+ #include <stdlib.h>
+ #else
+ char *malloc ();
+ char *realloc ();
+ #endif
+ 
+ 
+ /* Define the syntax stuff for \<, \>, etc.  */
+ 
+ /* This must be nonzero for the wordchar and notwordchar pattern
+    commands in re_match_2.  */
+ #ifndef Sword 
+ #define Sword 1
+ #endif
+ 
+ #ifdef SYNTAX_TABLE
+ 
+ extern char *re_syntax_table;
+ 
+ #else /* not SYNTAX_TABLE */
+ 
+ /* How many characters in the character set.  */
+ #define CHAR_SET_SIZE 256
+ 
+ static char re_syntax_table[CHAR_SET_SIZE];
+ 
+ static void
+ init_syntax_once ()
+ {
+    register int c;
+    static int done = 0;
+ 
+    if (done)
+      return;
+ 
+    bzero (re_syntax_table, sizeof re_syntax_table);
+ 
+    for (c = 'a'; c <= 'z'; c++)
+      re_syntax_table[c] = Sword;
+ 
+    for (c = 'A'; c <= 'Z'; c++)
+      re_syntax_table[c] = Sword;
+ 
+    for (c = '0'; c <= '9'; c++)
+      re_syntax_table[c] = Sword;
+ 
+    re_syntax_table['_'] = Sword;
+ 
+    done = 1;
+ }
+ 
+ #endif /* not SYNTAX_TABLE */
+ 
+ #define SYNTAX(c) re_syntax_table[c]
+ 
+ #endif /* not emacs */
+ 
+ /* Get the interface, including the syntax bits.  */
+ #include "regex.h"
+ #include "mbc.h"
+ 
+ /* isalpha etc. are used for the character classes.  */
+ #include <ctype.h>
+ 
+ /* Jim Meyering writes:
+ 
+    "... Some ctype macros are valid only for character codes that
+    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
+    using /bin/cc or gcc but without giving an ansi option).  So, all
+    ctype uses should be through macros like ISPRINT...  If
+    STDC_HEADERS is defined, then autoconf has verified that the ctype
+    macros don't need to be guarded with references to isascii. ...
+    Defining isascii to 1 should let any compiler worth its salt
+    eliminate the && through constant folding."  */
+ #if ! defined (isascii) || defined (STDC_HEADERS)
+ #undef isascii
+ #define isascii(c) 1
+ #endif
+ 
+ #ifdef isblank
+ #define ISBLANK(c) (isascii (c) && isblank (c))
+ #else
+ #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+ #endif
+ #ifdef isgraph
+ #define ISGRAPH(c) (isascii (c) && isgraph (c))
+ #else
+ #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
+ #endif
+ 
+ #define ISPRINT(c) (isascii (c) && isprint (c))
+ #define ISDIGIT(c) (isascii (c) && isdigit (c))
+ #define ISALNUM(c) (isascii (c) && isalnum (c))
+ #define ISALPHA(c) (isascii (c) && isalpha (c))
+ #define ISCNTRL(c) (isascii (c) && iscntrl (c))
+ #define ISLOWER(c) (isascii (c) && islower (c))
+ #define ISPUNCT(c) (isascii (c) && ispunct (c))
+ #define ISSPACE(c) (isascii (c) && isspace (c))
+ #define ISUPPER(c) (isascii (c) && isupper (c))
+ #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
+ 
+ #ifndef NULL
+ #define NULL 0
+ #endif
+ 
+ /* We remove any previous definition of `SIGN_EXTEND_CHAR',
+    since ours (we hope) works properly with all combinations of
+    machines, compilers, `char' and `unsigned char' argument types.
+    (Per Bothner suggested the basic approach.)  */
+ #undef SIGN_EXTEND_CHAR
+ #if __STDC__
+ #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+ #else  /* not __STDC__ */
+ /* As in Harbison and Steele.  */
+ #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+ #endif
+ 
+ /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
+    use `alloca' instead of `malloc'.  This is because using malloc in
+    re_search* or re_match* could cause memory leaks when C-g is used in
+    Emacs; also, malloc is slower and causes storage fragmentation.  On
+    the other hand, malloc is more portable, and easier to debug.  
+    
+    Because we sometimes use alloca, some routines have to be macros,
+    not functions -- `alloca'-allocated space disappears at the end of the
+    function it is called in.  */
+ 
+ #ifdef REGEX_MALLOC
+ 
+ #define REGEX_ALLOCATE malloc
+ #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+ 
+ #else /* not REGEX_MALLOC  */
+ 
+ /* Emacs already defines alloca, sometimes.  */
+ #ifndef alloca
+ 
+ /* Make alloca work the best possible way.  */
+ #ifdef __GNUC__
+ #define alloca __builtin_alloca
+ #else /* not __GNUC__ */
+ #if HAVE_ALLOCA_H
+ #include <alloca.h>
+ #else /* not __GNUC__ or HAVE_ALLOCA_H */
+ #ifndef _AIX /* Already did AIX, up at the top.  */
+ char *alloca ();
+ #endif /* not _AIX */
+ #endif /* not HAVE_ALLOCA_H */ 
+ #endif /* not __GNUC__ */
+ 
+ #endif /* not alloca */
+ 
+ #define REGEX_ALLOCATE alloca
+ 
+ /* Assumes a `char *destination' variable.  */
+ #define REGEX_REALLOCATE(source, osize, nsize)				\
+   (destination = (char *) alloca (nsize),				\
+    bcopy (source, destination, osize),					\
+    destination)
+ 
+ #endif /* not REGEX_MALLOC */
+ 
+ 
+ /* True if `size1' is non-NULL and PTR is pointing anywhere inside
+    `string1' or just past its end.  This works if PTR is NULL, which is
+    a good thing.  */
+ #define FIRST_STRING_P(ptr) 					\
+   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+ 
+ /* (Re)Allocate N items of type T using malloc, or fail.  */
+ #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+ #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+ #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+ 
+ #define BYTEWIDTH 8 /* In bits.  */
+ 
+ #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+ 
+ #define MAX(a, b) ((a) > (b) ? (a) : (b))
+ #define MIN(a, b) ((a) < (b) ? (a) : (b))
+ 
+ typedef char boolean;
+ #define false 0
+ #define true 1
+ 
+ /* These are the command codes that appear in compiled regular
+    expressions.  Some opcodes are followed by argument bytes.  A
+    command code can specify any interpretation whatsoever for its
+    arguments.  Zero bytes may appear in the compiled regular expression.
+ 
+    The value of `exactn' is needed in search.c (search_buffer) in Emacs.
+    So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
+    `exactn' we use here must also be 1.  */
+ 
+ typedef enum
+ {
+   no_op = 0,
+ 
+         /* Followed by one byte giving n, then by n literal bytes.  */
+   exactn = 1,
+ 
+         /* Matches any (more or less) character.  */
+   anychar,
+ 
+         /* Matches any one char belonging to specified set.  First
+            following byte is number of bitmap bytes.  Then come bytes
+            for a bitmap saying which chars are in.  Bits in each byte
+            are ordered low-bit-first.  A character is in the set if its
+            bit is 1.  A character too large to have a bit in the map is
+            automatically not in the set.  */
+   charset,
+ 
+         /* Same parameters as charset, but match any character that is
+            not one of those specified.  */
+   charset_not,
+ 
+         /* Start remembering the text that is matched, for storing in a
+            register.  Followed by one byte with the register number, in
+            the range 0 to one less than the pattern buffer's re_nsub
+            field.  Then followed by one byte with the number of groups
+            inner to this one.  (This last has to be part of the
+            start_memory only because we need it in the on_failure_jump
+            of re_match_2.)  */
+   start_memory,
+ 
+         /* Stop remembering the text that is matched and store it in a
+            memory register.  Followed by one byte with the register
+            number, in the range 0 to one less than `re_nsub' in the
+            pattern buffer, and one byte with the number of inner groups,
+            just like `start_memory'.  (We need the number of inner
+            groups here because we don't have any easy way of finding the
+            corresponding start_memory when we're at a stop_memory.)  */
+   stop_memory,
+ 
+         /* Match a duplicate of something remembered. Followed by one
+            byte containing the register number.  */
+   duplicate,
+ 
+         /* Fail unless at beginning of line.  */
+   begline,
+ 
+         /* Fail unless at end of line.  */
+   endline,
+ 
+         /* Succeeds if at beginning of buffer (if emacs) or at beginning
+            of string to be matched (if not).  */
+   begbuf,
+ 
+         /* Analogously, for end of buffer/string.  */
+   endbuf,
+  
+         /* Followed by two byte relative address to which to jump.  */
+   jump, 
+ 
+ 	/* Same as jump, but marks the end of an alternative.  */
+   jump_past_alt,
+ 
+         /* Followed by two-byte relative address of place to resume at
+            in case of failure.  */
+   on_failure_jump,
+ 	
+         /* Like on_failure_jump, but pushes a placeholder instead of the
+            current string position when executed.  */
+   on_failure_keep_string_jump,
+   
+         /* Throw away latest failure point and then jump to following
+            two-byte relative address.  */
+   pop_failure_jump,
+ 
+         /* Change to pop_failure_jump if know won't have to backtrack to
+            match; otherwise change to jump.  This is used to jump
+            back to the beginning of a repeat.  If what follows this jump
+            clearly won't match what the repeat does, such that we can be
+            sure that there is no use backtracking out of repetitions
+            already matched, then we change it to a pop_failure_jump.
+            Followed by two-byte address.  */
+   maybe_pop_jump,
+ 
+         /* Jump to following two-byte address, and push a dummy failure
+            point. This failure point will be thrown away if an attempt
+            is made to use it for a failure.  A `+' construct makes this
+            before the first repeat.  Also used as an intermediary kind
+            of jump when compiling an alternative.  */
+   dummy_failure_jump,
+ 
+ 	/* Push a dummy failure point and continue.  Used at the end of
+ 	   alternatives.  */
+   push_dummy_failure,
+ 
+         /* Followed by two-byte relative address and two-byte number n.
+            After matching N times, jump to the address upon failure.  */
+   succeed_n,
+ 
+         /* Followed by two-byte relative address, and two-byte number n.
+            Jump to the address N times, then fail.  */
+   jump_n,
+ 
+         /* Set the following two-byte relative address to the
+            subsequent two-byte number.  The address *includes* the two
+            bytes of number.  */
+   set_number_at,
+ 
+   wordchar,	/* Matches any word-constituent character.  */
+   notwordchar,	/* Matches any char that is not a word-constituent.  */
+ 
+   wordbeg,	/* Succeeds if at word beginning.  */
+   wordend,	/* Succeeds if at word end.  */
+ 
+   wordbound,	/* Succeeds if at a word boundary.  */
+   notwordbound	/* Succeeds if not at a word boundary.  */
+ 
+ #ifdef emacs
+   ,before_dot,	/* Succeeds if before point.  */
+   at_dot,	/* Succeeds if at point.  */
+   after_dot,	/* Succeeds if after point.  */
+ 
+ 	/* Matches any character whose syntax is specified.  Followed by
+            a byte which contains a syntax code, e.g., Sword.  */
+   syntaxspec,
+ 
+ 	/* Matches any character whose syntax is not that specified.  */
+   notsyntaxspec
+ #endif /* emacs */
+ } re_opcode_t;
+ 
+ /* Common operations on the compiled pattern.  */
+ 
+ /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
+ 
+ #define STORE_NUMBER(destination, number)				\
+   do {									\
+     (destination)[0] = (number) & 0377;					\
+     (destination)[1] = (number) >> 8;					\
+   } while (0)
+ 
+ /* Same as STORE_NUMBER, except increment DESTINATION to
+    the byte after where the number is stored.  Therefore, DESTINATION
+    must be an lvalue.  */
+ 
+ #define STORE_NUMBER_AND_INCR(destination, number)			\
+   do {									\
+     STORE_NUMBER (destination, number);					\
+     (destination) += 2;							\
+   } while (0)
+ 
+ /* Put into DESTINATION a number stored in two contiguous bytes starting
+    at SOURCE.  */
+ 
+ #define EXTRACT_NUMBER(destination, source)				\
+   do {									\
+     (destination) = *(source) & 0377;					\
+     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
+   } while (0)
+ 
+ #ifdef DEBUG
+ static void extract_number _RE_ARGS((int *dest, unsigned char *source));
+ static void
+ extract_number (dest, source)
+     int *dest;
+     unsigned char *source;
+ {
+   int temp = SIGN_EXTEND_CHAR (*(source + 1)); 
+   *dest = *source & 0377;
+   *dest += temp << 8;
+ }
+ 
+ #ifndef EXTRACT_MACROS /* To debug the macros.  */
+ #undef EXTRACT_NUMBER
+ #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+ #endif /* not EXTRACT_MACROS */
+ 
+ #endif /* DEBUG */
+ 
+ /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
+    SOURCE must be an lvalue.  */
+ 
+ #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
+   do {									\
+     EXTRACT_NUMBER (destination, source);				\
+     (source) += 2; 							\
+   } while (0)
+ 
+ #ifdef DEBUG
+ static void extract_number_and_incr _RE_ARGS((int *destination,
+ 				       unsigned char **source));
+ static void
+ extract_number_and_incr (destination, source)
+     int *destination;
+     unsigned char **source;
+ { 
+   extract_number (destination, *source);
+   *source += 2;
+ }
+ 
+ #ifndef EXTRACT_MACROS
+ #undef EXTRACT_NUMBER_AND_INCR
+ #define EXTRACT_NUMBER_AND_INCR(dest, src) \
+   extract_number_and_incr (&dest, &src)
+ #endif /* not EXTRACT_MACROS */
+ 
+ #endif /* DEBUG */
+ 
+ #define STORE_MBC(p, c) \
+   ((p)[0] = (unsigned char) ((c) >> 8), (p)[1] = (unsigned char) (c))
+ #define STORE_MBC_AND_INCR(p, c) \
+   (*(p)++ = (unsigned char) ((c) >> 8), *(p)++ = (unsigned char) (c))
+ 
+ #define EXTRACT_MBC(p) \
+   ((unsigned char) (p)[0] << 8 | (unsigned char) (p)[1])
+ #define EXTRACT_MBC_AND_INCR(p) \
+   ((p) += 2, (unsigned char) (p)[-2] << 8 | (unsigned char) (p)[-1])
+ 
+ #define EXTRACT_UNSIGNED(p) \
+   ((unsigned char) (p)[0] | (unsigned char) (p)[1] << 8)
+ #define EXTRACT_UNSIGNED_AND_INCR(p) \
+   ((p) += 2, (unsigned char) (p)[-2] | (unsigned char) (p)[-1] << 8)
+ 
+ /* If DEBUG is defined, Regex prints many voluminous messages about what
+    it is doing (if the variable `debug' is nonzero).  If linked with the
+    main program in `iregex.c', you can enter patterns and strings
+    interactively.  And if linked with the main program in `main.c' and
+    the other test files, you can run the already-written tests.  */
+ 
+ #ifdef DEBUG
+ 
+ /* We use standard I/O for debugging.  */
+ #include <stdio.h>
+ 
+ /* It is useful to test things that ``must'' be true when debugging.  */
+ #include <assert.h>
+ 
+ static int debug = 0;
+ 
+ #define DEBUG_STATEMENT(e) e
+ #define DEBUG_PRINT1(x) if (debug) printf (x)
+ #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+ #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+ #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+ #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
+   if (debug) print_partial_compiled_pattern (s, e)
+ #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
+   if (debug) print_double_string (w, s1, sz1, s2, sz2)
+ 
+ 
+ extern void printchar ();
+ 
+ /* Print the fastmap in human-readable form.  */
+ 
+ void
+ print_fastmap (fastmap)
+     char *fastmap;
+ {
+   unsigned was_a_range = 0;
+   unsigned i = 0;  
+   
+   while (i < (1 << BYTEWIDTH))
+     {
+       if (fastmap[i++])
+ 	{
+ 	  was_a_range = 0;
+           printchar (i - 1);
+           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
+             {
+               was_a_range = 1;
+               i++;
+             }
+ 	  if (was_a_range)
+             {
+               printf ("-");
+               printchar (i - 1);
+             }
+         }
+     }
+   putchar ('\n'); 
+ }
+ 
+ 
+ /* Print a compiled pattern string in human-readable form, starting at
+    the START pointer into it and ending just before the pointer END.  */
+ 
+ void
+ print_partial_compiled_pattern (start, end)
+     unsigned char *start;
+     unsigned char *end;
+ {
+   int mcnt, mcnt2;
+   unsigned char *p = start;
+   unsigned char *pend = end;
+ 
+   if (start == NULL)
+     {
+       printf ("(null)\n");
+       return;
+     }
+     
+   /* Loop over pattern commands.  */
+   while (p < pend)
+     {
+       printf ("%d:\t", p - start);
+ 
+       switch ((re_opcode_t) *p++)
+ 	{
+         case no_op:
+           printf ("/no_op");
+           break;
+ 
+ 	case exactn:
+ 	  mcnt = *p++;
+           printf ("/exactn/%d", mcnt);
+           do
+ 	    {
+               putchar ('/');
+ 	      if (ismbchar (*p) && 2 <= mcnt) {
+ 		printf ("/%.2s", (char *) p), p += 2, --mcnt;
+ 		continue;
+ 	      }
+ 	      printchar (*p++);
+             }
+           while (--mcnt);
+           break;
+ 
+ 	case start_memory:
+           mcnt = *p++;
+           printf ("/start_memory/%d/%d", mcnt, *p++);
+           break;
+ 
+ 	case stop_memory:
+           mcnt = *p++;
+ 	  printf ("/stop_memory/%d/%d", mcnt, *p++);
+           break;
+ 
+ 	case duplicate:
+ 	  printf ("/duplicate/%d", *p++);
+ 	  break;
+ 
+ 	case anychar:
+ 	  printf ("/anychar");
+ 	  break;
+ 
+ 	case charset:
+         case charset_not:
+           {
+             register int c, last = -100;
+ 	    register int in_range = 0;
+ 
+ 	    printf ("/charset [%s",
+ 	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
+             
+             assert (p + *p < pend);
+ 
+             for (c = 0; c < 256; c++)
+ 	      if (c / 8 < *p
+ 		  && (p[1 + (c/8)] & (1 << (c % 8))))
+ 		{
+ 		  /* Are we starting a range?  */
+ 		  if (last + 1 == c && ! in_range)
+ 		    {
+ 		      putchar ('-');
+ 		      in_range = 1;
+ 		    }
+ 		  /* Have we broken a range?  */
+ 		  else if (last + 1 != c && in_range)
+               {
+ 		      printchar (last);
+ 		      in_range = 0;
+ 		    }
+                 
+ 		  if (! in_range)
+ 		    printchar (c);
+ 
+ 		  last = c;
+               }
+ 
+ 	    if (in_range)
+ 	      printchar (last);
+ 
+ 	    p += 1 + *p;
+ 	    {
+ 	      unsigned short i, size;
+ 
+ 	      size = EXTRACT_UNSIGNED_AND_INCR (p);
+ 	      for (i = 0; i < size; i++)
+ 		printf ("%.2s-%.2s", (char *) p, (char *) p + 2),
+ 		p += 4;
+ 	    }
+ 	    putchar (']');
+ 	  }
+ 	  break;
+ 
+ 	case begline:
+ 	  printf ("/begline");
+           break;
+ 
+ 	case endline:
+           printf ("/endline");
+           break;
+ 
+ 	case on_failure_jump:
+           extract_number_and_incr (&mcnt, &p);
+   	  printf ("/on_failure_jump to %d", p + mcnt - start);
+           break;
+ 
+ 	case on_failure_keep_string_jump:
+           extract_number_and_incr (&mcnt, &p);
+   	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
+           break;
+ 
+ 	case dummy_failure_jump:
+           extract_number_and_incr (&mcnt, &p);
+   	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
+           break;
+ 
+ 	case push_dummy_failure:
+           printf ("/push_dummy_failure");
+           break;
+           
+         case maybe_pop_jump:
+           extract_number_and_incr (&mcnt, &p);
+   	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
+ 	  break;
+ 
+         case pop_failure_jump:
+ 	  extract_number_and_incr (&mcnt, &p);
+   	  printf ("/pop_failure_jump to %d", p + mcnt - start);
+ 	  break;          
+           
+         case jump_past_alt:
+ 	  extract_number_and_incr (&mcnt, &p);
+   	  printf ("/jump_past_alt to %d", p + mcnt - start);
+ 	  break;          
+           
+         case jump:
+ 	  extract_number_and_incr (&mcnt, &p);
+   	  printf ("/jump to %d", p + mcnt - start);
+ 	  break;
+ 
+         case succeed_n: 
+           extract_number_and_incr (&mcnt, &p);
+           extract_number_and_incr (&mcnt2, &p);
+ 	  printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
+           break;
+         
+         case jump_n: 
+           extract_number_and_incr (&mcnt, &p);
+           extract_number_and_incr (&mcnt2, &p);
+ 	  printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
+           break;
+         
+         case set_number_at: 
+           extract_number_and_incr (&mcnt, &p);
+           extract_number_and_incr (&mcnt2, &p);
+ 	  printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
+           break;
+         
+         case wordbound:
+ 	  printf ("/wordbound");
+ 	  break;
+ 
+ 	case notwordbound:
+ 	  printf ("/notwordbound");
+           break;
+ 
+ 	case wordbeg:
+ 	  printf ("/wordbeg");
+ 	  break;
+           
+ 	case wordend:
+ 	  printf ("/wordend");
+           
+ #ifdef emacs
+ 	case before_dot:
+ 	  printf ("/before_dot");
+           break;
+ 
+ 	case at_dot:
+ 	  printf ("/at_dot");
+           break;
+ 
+ 	case after_dot:
+ 	  printf ("/after_dot");
+           break;
+ 
+ 	case syntaxspec:
+           printf ("/syntaxspec");
+ 	  mcnt = *p++;
+ 	  printf ("/%d", mcnt);
+           break;
+ 	  
+ 	case notsyntaxspec:
+           printf ("/notsyntaxspec");
+ 	  mcnt = *p++;
+ 	  printf ("/%d", mcnt);
+ 	  break;
+ #endif /* emacs */
+ 
+ 	case wordchar:
+ 	  printf ("/wordchar");
+           break;
+ 	  
+ 	case notwordchar:
+ 	  printf ("/notwordchar");
+           break;
+ 
+ 	case begbuf:
+ 	  printf ("/begbuf");
+           break;
+ 
+ 	case endbuf:
+ 	  printf ("/endbuf");
+           break;
+ 
+         default:
+           printf ("?%d", *(p-1));
+ 	}
+ 
+       putchar ('\n');
+     }
+ 
+   printf ("%d:\tend of pattern.\n", p - start);
+ }
+ 
+ 
+ void
+ print_compiled_pattern (bufp)
+     struct re_pattern_buffer *bufp;
+ {
+   unsigned char *buffer = bufp->buffer;
+ 
+   print_partial_compiled_pattern (buffer, buffer + bufp->used);
+   printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
+ 
+   if (bufp->fastmap_accurate && bufp->fastmap)
+     {
+       printf ("fastmap: ");
+       print_fastmap (bufp->fastmap);
+     }
+ 
+   printf ("re_nsub: %d\t", bufp->re_nsub);
+   printf ("regs_alloc: %d\t", bufp->regs_allocated);
+   printf ("can_be_null: %d\t", bufp->can_be_null);
+   printf ("newline_anchor: %d\n", bufp->newline_anchor);
+   printf ("no_sub: %d\t", bufp->no_sub);
+   printf ("not_bol: %d\t", bufp->not_bol);
+   printf ("not_eol: %d\t", bufp->not_eol);
+   printf ("syntax: %lu\n", (unsigned long)bufp->syntax);
+   /* Perhaps we should print the translate table?  */
+ }
+ 
+ 
+ void
+ print_double_string (where, string1, size1, string2, size2)
+     const char *where;
+     const char *string1;
+     const char *string2;
+     int size1;
+     int size2;
+ {
+   unsigned this_char;
+   
+   if (where == NULL)
+     printf ("(null)");
+   else
+     {
+       if (FIRST_STRING_P (where))
+         {
+           for (this_char = where - string1; this_char < size1; this_char++)
+             printchar (string1[this_char]);
+ 
+           where = string2;    
+         }
+ 
+       for (this_char = where - string2; this_char < size2; this_char++)
+         printchar (string2[this_char]);
+     }
+ }
+ 
+ #else /* not DEBUG */
+ 
+ #undef assert
+ #define assert(e)
+ 
+ #define DEBUG_STATEMENT(e)
+ #define DEBUG_PRINT1(x)
+ #define DEBUG_PRINT2(x1, x2)
+ #define DEBUG_PRINT3(x1, x2, x3)
+ #define DEBUG_PRINT4(x1, x2, x3, x4)
+ #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+ #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+ 
+ #endif /* not DEBUG */
+ 
+ /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
+    also be assigned to arbitrarily: each pattern buffer stores its own
+    syntax, so it can be changed between regex compilations.  */
+ reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
+ 
+ 
+ /* Specify the precise syntax of regexps for compilation.  This provides
+    for compatibility for various utilities which historically have
+    different, incompatible syntaxes.
+ 
+    The argument SYNTAX is a bit mask comprised of the various bits
+    defined in regex.h.  We return the old syntax.  */
+ 
+ reg_syntax_t
+ re_set_syntax (syntax)
+     reg_syntax_t syntax;
+ {
+   reg_syntax_t ret = re_syntax_options;
+   
+   re_syntax_options = syntax;
+   return ret;
+ }
+ 
+ /* This table gives an error message for each of the error codes listed
+    in regex.h.  Obviously the order here has to be same as there.  */
+ 
+ static const char *re_error_msg[] =
+   { NULL,					/* REG_NOERROR */
+     "No match",					/* REG_NOMATCH */
+     "Invalid regular expression",		/* REG_BADPAT */
+     "Invalid collation character",		/* REG_ECOLLATE */
+     "Invalid character class name",		/* REG_ECTYPE */
+     "Trailing backslash",			/* REG_EESCAPE */
+     "Invalid back reference",			/* REG_ESUBREG */
+     "Unmatched [ or [^",			/* REG_EBRACK */
+     "Unmatched ( or \\(",			/* REG_EPAREN */
+     "Unmatched \\{",				/* REG_EBRACE */
+     "Invalid content of \\{\\}",		/* REG_BADBR */
+     "Invalid range end",			/* REG_ERANGE */
+     "Memory exhausted",				/* REG_ESPACE */
+     "Invalid preceding regular expression",	/* REG_BADRPT */
+     "Premature end of regular expression",	/* REG_EEND */
+     "Regular expression too big",		/* REG_ESIZE */
+     "Unmatched ) or \\)",			/* REG_ERPAREN */
+   };
+ 
+ /* Subroutine declarations and macros for regex_compile.  */
+ 
+ static reg_errcode_t regex_compile _RE_ARGS((const char *pattern, size_t size,
+ 					     reg_syntax_t syntax,
+ 					     struct re_pattern_buffer *bufp));
+ static void store_op1 _RE_ARGS((re_opcode_t op, unsigned char *loc, int arg));
+ static void store_op2 _RE_ARGS((re_opcode_t op, unsigned char *loc,
+ 				int arg1, int arg2));
+ static void insert_op1 _RE_ARGS((re_opcode_t op, unsigned char *loc,
+ 				 int arg, unsigned char *end));
+ static void insert_op2 _RE_ARGS((re_opcode_t op, unsigned char *loc,
+ 				 int arg1, int arg2, unsigned char *end));
+ static boolean at_begline_loc_p _RE_ARGS((const char *pattern, const char *p,
+ 					  reg_syntax_t syntax));
+ static boolean at_endline_loc_p _RE_ARGS((const char *p, const char *pend,
+ 					  reg_syntax_t syntax));
+ #if 0 /* We don't use this. */
+ static reg_errcode_t compile_range _RE_ARGS((const char **p_ptr,
+ 					     const char *pend,
+ 					     char *translate,
+ 					     reg_syntax_t syntax,
+ 					     unsigned char *b));
+ #endif
+ 
+ /* Fetch the next character in the uncompiled pattern---translating it 
+    if necessary.  Also cast from a signed character in the constant
+    string passed to us by the user to an unsigned char that we can use
+    as an array index (in, e.g., `translate').  */
+ #define PATFETCH(c)							\
+   do {if (p == pend) return REG_EEND;					\
+     c = (unsigned char) *p++;						\
+     if (translate && !ismbchar (c))					\
+       c = (unsigned char) translate[(unsigned char) (c)];		\
+   } while (0)
+ 
+ /* Fetch the next character in the uncompiled pattern, with no
+    translation.  */
+ #define PATFETCH_RAW(c)							\
+   do {if (p == pend) return REG_EEND;					\
+     c = (unsigned char) *p++; 						\
+   } while (0)
+ 
+ /* Go backwards one character in the pattern.  */
+ #define PATUNFETCH p--
+ 
+ 
+ /* If `translate' is non-null, return translate[D], else just D.  We
+    cast the subscript to translate because some data is declared as
+    `char *', to avoid warnings when a string constant is passed.  But
+    when we use a character as a subscript we must make it unsigned.  */
+ #define TRANSLATE(d) (translate						\
+ 		      ? (unsigned char) translate[(unsigned char) (d)]	\
+ 		      : (d))
+ 
+ 
+ /* Macros for outputting the compiled pattern into `buffer'.  */
+ 
+ /* If the buffer isn't allocated when it comes in, use this.  */
+ #define INIT_BUF_SIZE  32
+ 
+ /* Make sure we have at least N more bytes of space in buffer.  */
+ #define GET_BUFFER_SPACE(n)						\
+     while (b - bufp->buffer + (n) > bufp->allocated)			\
+       EXTEND_BUFFER ()
+ 
+ /* Make sure we have one more byte of buffer space and then add C to it.  */
+ #define BUF_PUSH(c)							\
+   do {									\
+     GET_BUFFER_SPACE (1);						\
+     *b++ = (unsigned char) (c);						\
+   } while (0)
+ 
+ 
+ /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
+ #define BUF_PUSH_2(c1, c2)						\
+   do {									\
+     GET_BUFFER_SPACE (2);						\
+     *b++ = (unsigned char) (c1);					\
+     *b++ = (unsigned char) (c2);					\
+   } while (0)
+ 
+ 
+ /* As with BUF_PUSH_2, except for three bytes.  */
+ #define BUF_PUSH_3(c1, c2, c3)						\
+   do {									\
+     GET_BUFFER_SPACE (3);						\
+     *b++ = (unsigned char) (c1);					\
+     *b++ = (unsigned char) (c2);					\
+     *b++ = (unsigned char) (c3);					\
+   } while (0)
+ 
+ 
+ /* Store a jump with opcode OP at LOC to location TO.  We store a
+    relative address offset by the three bytes the jump itself occupies.  */
+ #define STORE_JUMP(op, loc, to) \
+   store_op1 (op, loc, (int)((to) - (loc) - 3))
+ 
+ /* Likewise, for a two-argument jump.  */
+ #define STORE_JUMP2(op, loc, to, arg) \
+   store_op2 (op, loc, (int)((to) - (loc) - 3), arg)
+ 
+ /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
+ #define INSERT_JUMP(op, loc, to) \
+   insert_op1 (op, loc, (int)((to) - (loc) - 3), b)
+ 
+ /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
+ #define INSERT_JUMP2(op, loc, to, arg) \
+   insert_op2 (op, loc, (int)((to) - (loc) - 3), arg, b)
+ 
+ 
+ /* This is not an arbitrary limit: the arguments which represent offsets
+    into the pattern are two bytes long.  So if 2^16 bytes turns out to
+    be too small, many things would have to change.  */
+ /* Any other compiler which, like MSC, has allocation limit below 2^16
+    bytes will have to use approach similar to what was done below for
+    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
+    reallocating to 0 bytes.  Such thing is not going to work too well.
+    You have been warned!!  */
+ #ifdef _MSC_VER
+ /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
+    The REALLOC define eliminates a flurry of conversion warnings,
+    but is not required. */
+ #define MAX_BUF_SIZE  65500L
+ #define REALLOC(p,s) realloc((p), (size_t) (s))
+ #else
+ #define MAX_BUF_SIZE (1L << 16)
+ #define REALLOC realloc
+ #endif
+ 
+ /* Extend the buffer by twice its current size via realloc and
+    reset the pointers that pointed into the old block to point to the
+    correct places in the new one.  If extending the buffer results in it
+    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
+ #define EXTEND_BUFFER()							\
+   do { 									\
+     unsigned char *old_buffer = bufp->buffer;				\
+     if (bufp->allocated == MAX_BUF_SIZE) 				\
+       return REG_ESIZE;							\
+     bufp->allocated <<= 1;						\
+     if (bufp->allocated > MAX_BUF_SIZE)					\
+       bufp->allocated = MAX_BUF_SIZE; 					\
+     bufp->buffer = (unsigned char *) REALLOC(bufp->buffer, bufp->allocated);\
+     if (bufp->buffer == NULL)						\
+       return REG_ESPACE;						\
+     /* If the buffer moved, move all the pointers into it.  */		\
+     if (old_buffer != bufp->buffer)					\
+       {									\
+         b = (b - old_buffer) + bufp->buffer;				\
+         begalt = (begalt - old_buffer) + bufp->buffer;			\
+         if (fixup_alt_jump)						\
+           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
+         if (laststart)							\
+           laststart = (laststart - old_buffer) + bufp->buffer;		\
+         if (pending_exact)						\
+           pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
+       }									\
+   } while (0)
+ 
+ 
+ /* Since we have one byte reserved for the register number argument to
+    {start,stop}_memory, the maximum number of groups we can report
+    things about is what fits in that byte.  */
+ #define MAX_REGNUM 255
+ 
+ /* But patterns can have more than `MAX_REGNUM' registers.  We just
+    ignore the excess.  */
+ typedef unsigned regnum_t;
+ 
+ 
+ /* Macros for the compile stack.  */
+ 
+ /* Since offsets can go either forwards or backwards, this type needs to
+    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
+ /* int may be not enough when sizeof(int) == 2                           */
+ typedef long pattern_offset_t;
+ 
+ typedef struct
+ {
+   pattern_offset_t begalt_offset;
+   pattern_offset_t fixup_alt_jump;
+   pattern_offset_t inner_group_offset;
+   pattern_offset_t laststart_offset;  
+   regnum_t regnum;
+ } compile_stack_elt_t;
+ 
+ 
+ typedef struct
+ {
+   compile_stack_elt_t *stack;
+   unsigned size;
+   unsigned avail;			/* Offset of next open position.  */
+ } compile_stack_type;
+ 
+ 
+ #define INIT_COMPILE_STACK_SIZE 32
+ 
+ #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
+ #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
+ 
+ /* The next available element.  */
+ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+ 
+ 
+ /* Set the bit for character C in a list.  */
+ #define SET_LIST_BIT(c)                               \
+   (b[((unsigned char) (c)) / BYTEWIDTH]               \
+    |= 1 << (((unsigned char) c) % BYTEWIDTH))
+ 
+ 
+ /* Get the next unsigned number in the uncompiled pattern.  */
+ #define GET_UNSIGNED_NUMBER(num) 					\
+   { if (p != pend)							\
+      {									\
+        PATFETCH (c); 							\
+        while (ISDIGIT (c)) 						\
+          { 								\
+            if (num < 0)							\
+               num = 0;							\
+            num = num * 10 + c - '0'; 					\
+            if (p == pend) 						\
+               break; 							\
+            PATFETCH (c);						\
+          } 								\
+        } 								\
+     }		
+ 
+ #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
+ 
+ #define IS_CHAR_CLASS(string)						\
+    (STREQ (string, "alpha") || STREQ (string, "upper")			\
+     || STREQ (string, "lower") || STREQ (string, "digit")		\
+     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
+     || STREQ (string, "space") || STREQ (string, "print")		\
+     || STREQ (string, "punct") || STREQ (string, "graph")		\
+     || STREQ (string, "cntrl") || STREQ (string, "blank"))
+ 
+ /* Handle charset(_not)?.
+ 
+    Structure of charset(_not)? in compiled pattern.
+ 
+      struct {
+        unsinged char id;		charset(_not)?
+        unsigned char sbc_size;
+        unsigned char sbc_map[sbc_size];	same as original up to here.
+        unsigned short mbc_size;		number of intervals.
+        struct {
+ 	 unsigned short beg;		beginning of interval.
+ 	 unsigned short end;		end of interval.
+        } intervals[mbc_size];
+      }; */
+ 
+ static reg_errcode_t
+ #ifdef __STDC__
+ set_list_bits (unsigned short c1, unsigned short c2,
+ 	       reg_syntax_t syntax, unsigned char *b, const char *translate)
+ #else
+ set_list_bits (c1, c2, syntax, b, translate)
+      unsigned short c1, c2;
+      reg_syntax_t syntax;
+      unsigned char *b;
+      const char *translate;
+ #endif
+ {
+   unsigned char sbc_size = b[-1];
+   unsigned short mbc_size = EXTRACT_UNSIGNED (&b[sbc_size]);
+   unsigned short beg, end, upb;
+ 
+   if (c1 > c2)
+     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+   if (c1 < 1 << BYTEWIDTH) {
+     upb = c2;
+     if (1 << BYTEWIDTH <= upb)
+       upb = (1 << BYTEWIDTH) - 1;	/* The last single-byte char */
+     if (sbc_size <= upb / BYTEWIDTH) {
+       /* Allocate maximum size so it never happens again.  */
+       /* NOTE: memcpy() would not work here.  */
+       bcopy (&b[sbc_size], &b[(1 << BYTEWIDTH) / BYTEWIDTH], 2 + mbc_size*4);
+       bzero (&b[sbc_size], (1 << BYTEWIDTH) / BYTEWIDTH - sbc_size);
+       b[-1] = sbc_size = (1 << BYTEWIDTH) / BYTEWIDTH;
+     }
+     if (!translate) {
+       for (; c1 <= upb; c1++)
+ 	if (!ismbchar (c1))
+ 	  SET_LIST_BIT (c1);
+     }
+     else
+       for (; c1 <= upb; c1++)
+ 	if (!ismbchar (c1))
+ 	  SET_LIST_BIT (TRANSLATE (c1));
+     if (c2 < 1 << BYTEWIDTH)
+       return REG_NOERROR;
+     c1 = 0x8000;			/* The first wide char */
+   }
+   b = &b[sbc_size + 2];
+ 
+   /*                             intervals[beg]
+          ●----------●          ●----------●
+                          c1
+                        ○----------------------●
+ 
+      上図のような区間のインデックス beg を決定する. */
+   for (beg = 0, upb = mbc_size; beg < upb; ) {
+     unsigned short mid = (beg + upb) >> 1;
+ 
+     if (c1 - 1 > EXTRACT_MBC (&b[mid*4 + 2]))
+       beg = mid + 1;
+     else
+       upb = mid;
+   }
+ 
+   /*                        intervals[end]
+            ●-------●      ●----------●
+                         c2
+          ●---------------○
+ 
+      上図のような区間のインデックス end を決定する. */
+   for (end = beg, upb = mbc_size; end < upb; ) {
+     unsigned short mid = (end + upb) >> 1;
+ 
+     if (c2 >= EXTRACT_MBC (&b[mid*4]) - 1)
+       end = mid + 1;
+     else
+       upb = mid;
+   }
+ 
+   if (beg != end) {
+     /* 既存の区間を少なくとも1つ統合する場合,
+        区間の始点, 終点を修正する. */
+     if (c1 > EXTRACT_MBC (&b[beg*4]))
+       c1 = EXTRACT_MBC (&b[beg*4]);
+     if (c2 < EXTRACT_MBC (&b[end*4 - 2]))
+       c2 = EXTRACT_MBC (&b[end*4 - 2]);
+   }
+   if (end < mbc_size && end != beg + 1)
+     /* 追加される区間の後ろに既存の区間を移動する. */
+     /* NOTE: memcpy() would not work here.  */
+     bcopy (&b[end*4], &b[(beg + 1)*4], (mbc_size - end)*4);
+   STORE_MBC (&b[beg*4 + 0], c1);
+   STORE_MBC (&b[beg*4 + 2], c2);
+   mbc_size += beg + 1 - end;
+   STORE_NUMBER (&b[-2], mbc_size);
+   return REG_NOERROR;
+ }
+ 
+ static int
+ #ifdef __STDC__
+ is_in_list (unsigned short c, const unsigned char *b)
+ #else
+ is_in_list (c, b)
+      unsigned short c;
+      const unsigned char *b;
+ #endif
+ {
+   unsigned short size;
+   int in = (re_opcode_t) b[-1] == charset_not;
+ 
+   size = *b++;
+   if (c < 1 << BYTEWIDTH) {
+     if (c / BYTEWIDTH < size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH)
+       in = !in;
+   }
+   else {
+     unsigned short i, j;
+ 
+     b += size + 2;
+     size = EXTRACT_UNSIGNED (&b[-2]);
+ 
+     /*                        intervals[i]
+              ●-------●      ●--------●
+                         c
+                       ○----------------●
+ 
+      上図のような区間のインデックス i を決定する. */
+     for (i = 0, j = size; i < j; ) {
+       unsigned short k = (i + j) >> 1;
+ 
+       if (c > EXTRACT_MBC (&b[k*4 + 2]))
+ 	i = k + 1;
+       else
+ 	j = k;
+     }
+     if (i < size && EXTRACT_MBC (&b[i*4]) <= c
+ 	/* [...] から, 無効なマルチバイト文字を除外する.  ここでは簡単の
+ 	   ため２バイト目が '\n' または '\0' だけを無効とした.  [^...] 
+ 	   の場合は, 逆に無効なマルチバイト文字をマッチさせる.  */
+ 	&& ((unsigned char) c != '\n' && (unsigned char) c != '\0'))
+       in = !in;
+   }
+   return in;
+ }
+ 
+ static boolean group_in_compile_stack _RE_ARGS((compile_stack_type
+ 						compile_stack,
+ 						regnum_t regnum));
+ 
+ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+    Returns one of error codes defined in `regex.h', or zero for success.
+ 
+    Assumes the `allocated' (and perhaps `buffer') and `translate'
+    fields are set in BUFP on entry.
+ 
+    If it succeeds, results are put in BUFP (if it returns an error, the
+    contents of BUFP are undefined):
+      `buffer' is the compiled pattern;
+      `syntax' is set to SYNTAX;
+      `used' is set to the length of the compiled pattern;
+      `fastmap_accurate' is zero;
+      `re_nsub' is the number of subexpressions in PATTERN;
+      `not_bol' and `not_eol' are zero;
+    
+    The `fastmap' and `newline_anchor' fields are neither
+    examined nor set.  */
+ 
+ static reg_errcode_t
+ regex_compile (pattern, size, syntax, bufp)
+      const char *pattern;
+      size_t size;
+      reg_syntax_t syntax;
+      struct re_pattern_buffer *bufp;
+ {
+   /* We fetch characters from PATTERN here.  Even though PATTERN is
+      `char *' (i.e., signed), we declare these variables as unsigned, so
+      they can be reliably used as array indices.  */
+   register unsigned char c, c1;
+   
+   /* A random tempory spot in PATTERN.  */
+   const char *p1;
+ 
+   /* Points to the end of the buffer, where we should append.  */
+   register unsigned char *b;
+   
+   /* Keeps track of unclosed groups.  */
+   compile_stack_type compile_stack;
+ 
+   /* Points to the current (ending) position in the pattern.  */
+   const char *p = pattern;
+   const char *pend = pattern + size;
+   
+   /* How to translate the characters in the pattern.  */
+   char *translate = bufp->translate;
+ 
+   /* Address of the count-byte of the most recently inserted `exactn'
+      command.  This makes it possible to tell if a new exact-match
+      character can be added to that command or if the character requires
+      a new `exactn' command.  */
+   unsigned char *pending_exact = 0;
+ 
+   /* Address of start of the most recently finished expression.
+      This tells, e.g., postfix * where to find the start of its
+      operand.  Reset at the beginning of groups and alternatives.  */
+   unsigned char *laststart = 0;
+ 
+   /* Address of beginning of regexp, or inside of last group.  */
+   unsigned char *begalt;
+ 
+   /* Place in the uncompiled pattern (i.e., the {) to
+      which to go back if the interval is invalid.  */
+   const char *beg_interval;
+                 
+   /* Address of the place where a forward jump should go to the end of
+      the containing expression.  Each alternative of an `or' -- except the
+      last -- ends with a forward jump of this sort.  */
+   unsigned char *fixup_alt_jump = 0;
+ 
+   /* Counts open-groups as they are encountered.  Remembered for the
+      matching close-group on the compile stack, so the same register
+      number is put in the stop_memory as the start_memory.  */
+   regnum_t regnum = 0;
+ 
+ #ifdef DEBUG
+   DEBUG_PRINT1 ("\nCompiling pattern: ");
+   if (debug)
+     {
+       unsigned debug_count;
+       
+       for (debug_count = 0; debug_count < size; debug_count++)
+         printchar (pattern[debug_count]);
+       putchar ('\n');
+     }
+ #endif /* DEBUG */
+ 
+   /* Initialize the compile stack.  */
+   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
+   if (compile_stack.stack == NULL)
+     return REG_ESPACE;
+ 
+   compile_stack.size = INIT_COMPILE_STACK_SIZE;
+   compile_stack.avail = 0;
+ 
+   /* Initialize the pattern buffer.  */
+   bufp->syntax = syntax;
+   bufp->fastmap_accurate = 0;
+   bufp->not_bol = bufp->not_eol = 0;
+ 
+   /* Set `used' to zero, so that if we return an error, the pattern
+      printer (for debugging) will think there's no pattern.  We reset it
+      at the end.  */
+   bufp->used = 0;
+   
+   /* Always count groups, whether or not bufp->no_sub is set.  */
+   bufp->re_nsub = 0;				
+ 
+ #if !defined (emacs) && !defined (SYNTAX_TABLE)
+   /* Initialize the syntax table.  */
+    init_syntax_once ();
+ #endif
+ 
+   if (bufp->allocated == 0)
+     {
+       if (bufp->buffer)
+ 	{ /* If zero allocated, but buffer is non-null, try to realloc
+              enough space.  This loses if buffer's address is bogus, but
+              that is the user's responsibility.  */
+           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
+         }
+       else
+         { /* Caller did not allocate a buffer.  Do it for them.  */
+           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
+         }
+       if (!bufp->buffer) return REG_ESPACE;
+ 
+       bufp->allocated = INIT_BUF_SIZE;
+     }
+ 
+   begalt = b = bufp->buffer;
+ 
+   /* Loop through the uncompiled pattern until we're at the end.  */
+   while (p != pend)
+     {
+       PATFETCH (c);
+ 
+       switch (c)
+         {
+         case '^':
+           {
+             if (   /* If at start of pattern, it's an operator.  */
+                    p == pattern + 1
+                    /* If context independent, it's an operator.  */
+                 || syntax & RE_CONTEXT_INDEP_ANCHORS
+                    /* Otherwise, depends on what's come before.  */
+                 || at_begline_loc_p (pattern, p, syntax))
+               BUF_PUSH (begline);
+             else
+               goto normal_char;
+           }
+           break;
+ 
+ 
+         case '$':
+           {
+             if (   /* If at end of pattern, it's an operator.  */
+                    p == pend 
+                    /* If context independent, it's an operator.  */
+                 || syntax & RE_CONTEXT_INDEP_ANCHORS
+                    /* Otherwise, depends on what's next.  */
+                 || at_endline_loc_p (p, pend, syntax))
+                BUF_PUSH (endline);
+              else
+                goto normal_char;
+            }
+            break;
+ 
+ 
+ 	case '+':
+         case '?':
+           if ((syntax & RE_BK_PLUS_QM)
+               || (syntax & RE_LIMITED_OPS))
+             goto normal_char;
+         handle_plus:
+         case '*':
+           /* If there is no previous pattern... */
+           if (!laststart)
+             {
+               if (syntax & RE_CONTEXT_INVALID_OPS)
+                 return REG_BADRPT;
+               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+                 goto normal_char;
+             }
+ 
+           {
+             /* Are we optimizing this jump?  */
+             boolean keep_string_p = false;
+             
+             /* 1 means zero (many) matches is allowed.  */
+             char zero_times_ok = 0, many_times_ok = 0;
+ 
+             /* If there is a sequence of repetition chars, collapse it
+                down to just one (the right one).  We can't combine
+                interval operators with these because of, e.g., `a{2}*',
+                which should only match an even number of `a's.  */
+ 
+             for (;;)
+               {
+                 zero_times_ok |= c != '+';
+                 many_times_ok |= c != '?';
+ 
+                 if (p == pend)
+                   break;
+ 
+                 PATFETCH (c);
+ 
+                 if (c == '*'
+                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+                   ;
+ 
+                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
+                   {
+                     if (p == pend) return REG_EESCAPE;
+ 
+                     PATFETCH (c1);
+                     if (!(c1 == '+' || c1 == '?'))
+                       {
+                         PATUNFETCH;
+                         PATUNFETCH;
+                         break;
+                       }
+ 
+                     c = c1;
+                   }
+                 else
+                   {
+                     PATUNFETCH;
+                     break;
+                   }
+ 
+                 /* If we get here, we found another repeat character.  */
+                }
+ 
+             /* Star, etc. applied to an empty pattern is equivalent
+                to an empty pattern.  */
+             if (!laststart)  
+               break;
+ 
+             /* Now we know whether or not zero matches is allowed
+                and also whether or not two or more matches is allowed.  */
+             if (many_times_ok)
+               { /* More than one repetition is allowed, so put in at the
+                    end a backward relative jump from `b' to before the next
+                    jump we're going to put in below (which jumps from
+                    laststart to after this jump).  
+ 
+                    But if we are at the `*' in the exact sequence `.*\n',
+                    insert an unconditional jump backwards to the .,
+                    instead of the beginning of the loop.  This way we only
+                    push a failure point once, instead of every time
+                    through the loop.  */
+                 assert (p - 1 > pattern);
+ 
+                 /* Allocate the space for the jump.  */
+                 GET_BUFFER_SPACE (3);
+ 
+                 /* We know we are not at the first character of the pattern,
+                    because laststart was nonzero.  And we've already
+                    incremented `p', by the way, to be the character after
+                    the `*'.  Do we have to do something analogous here
+                    for null bytes, because of RE_DOT_NOT_NULL?  */
+                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+ 		    && zero_times_ok
+                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+                     && !(syntax & RE_DOT_NEWLINE))
+                   { /* We have .*\n.  */
+                     STORE_JUMP (jump, b, laststart);
+                     keep_string_p = true;
+                   }
+                 else
+                   /* Anything else.  */
+                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
+ 
+                 /* We've added more stuff to the buffer.  */
+                 b += 3;
+               }
+ 
+             /* On failure, jump from laststart to b + 3, which will be the
+                end of the buffer after this jump is inserted.  */
+             GET_BUFFER_SPACE (3);
+             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
+                                        : on_failure_jump,
+                          laststart, b + 3);
+             pending_exact = 0;
+             b += 3;
+ 
+             if (!zero_times_ok)
+               {
+                 /* At least one repetition is required, so insert a
+                    `dummy_failure_jump' before the initial
+                    `on_failure_jump' instruction of the loop. This
+                    effects a skip over that instruction the first time
+                    we hit that loop.  */
+                 GET_BUFFER_SPACE (3);
+                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
+                 b += 3;
+               }
+             }
+ 	  break;
+ 
+ 
+ 	case '.':
+           laststart = b;
+           BUF_PUSH (anychar);
+           break;
+ 
+ 
+         case '[':
+           {
+             boolean had_char_class = false;
+ 	    unsigned short c, c1;
+ 	    int last_char = -1;
+ 
+             if (p == pend) return REG_EBRACK;
+ 
+             /* Ensure that we have enough space to push a charset: the
+                opcode, the length count, and the bitset; 34 bytes in all.  */
+ 	    /* + 2 + 4 for mbcharset(_not)? with just one interval.  */
+ 	    GET_BUFFER_SPACE (34 + 2 + 4);
+ 
+             laststart = b;
+ 
+             /* We test `*p == '^' twice, instead of using an if
+                statement, so we only need one BUF_PUSH.  */
+             BUF_PUSH (*p == '^' ? charset_not : charset); 
+             if (*p == '^')
+               p++;
+ 
+             /* Remember the first position in the bracket expression.  */
+             p1 = p;
+ 
+             /* Push the number of bytes in the bitmap.  */
+             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+ 
+             /* Clear the whole map.  */
+ 	    bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH + 2);
+ 
+             /* charset_not matches newline according to a syntax bit.  */
+             if ((re_opcode_t) b[-2] == charset_not
+                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+               SET_LIST_BIT ('\n');
+ 
+             /* Read in characters and ranges, setting map bits.  */
+             for (;;)
+               {
+ 		int size;
+ 
+                 if (p == pend) return REG_EBRACK;
+ 
+ 		if ((size = EXTRACT_UNSIGNED (&b[(1 << BYTEWIDTH) / BYTEWIDTH])))
+ 		  /* Ensure the space is enough to hold another interval
+ 		     of multi-byte chars in charset(_not)?.  */
+ 		  GET_BUFFER_SPACE (32 + 2 + size*4 + 4);
+ 
+ 		PATFETCH_RAW (c);
+ 
+                 /* \ might escape characters inside [...] and [^...].  */
+                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+                   {
+                     if (p == pend) return REG_EESCAPE;
+ 
+ 		    PATFETCH_RAW (c1);
+ 		    if (ismbchar (c1)) {
+ 		      unsigned char c2;
+ 
+ 		      PATFETCH_RAW (c2);
+ 		      c1 = c1 << 8 | c2;
+ 		      (void) set_list_bits (c1, c1, syntax, b, translate);
+ 		      last_char = c1;
+ 		      continue;
+ 		    }
+ 		    SET_LIST_BIT (TRANSLATE (c1));
+ 		    last_char = c1;
+                     continue;
+                   }
+ 
+                 /* Could be the end of the bracket expression.  If it's
+                    not (i.e., when the bracket expression is `[]' so
+                    far), the ']' character bit gets set way below.  */
+                 if (c == ']' && p != p1 + 1)
+                   break;
+ 
+                 /* Look ahead to see if it's a range when the last thing
+                    was a character class.  */
+                 if (had_char_class && c == '-' && *p != ']')
+                   return REG_ERANGE;
+ 
+ 		if (ismbchar (c)) {
+ 		  unsigned char c2;
+ 
+ 		  PATFETCH_RAW (c2);
+ 		  c = c << 8 | c2;
+ 		}
+ 
+                 /* Look ahead to see if it's a range when the last thing
+                    was a character: if this is a hyphen not at the
+                    beginning or the end of a list, then it's the range
+                    operator.  */
+                 if (c == '-' 
+ #if 0 /* The original was: */
+                     && !(p - 2 >= pattern && p[-2] == '[') 
+                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ #else /* I wonder why he did not write like this.
+ 	 Have we got any problems?  */
+ 		    && p != p1 + 1
+ #endif
+                     && *p != ']')
+                   {
+ 		    reg_errcode_t ret;
+ 
+ 		    assert (last_char >= 0);
+ 		    PATFETCH_RAW (c1);
+ 		    if (ismbchar (c1)) {
+ 		      unsigned char c2;
+ 
+ 		      PATFETCH_RAW (c2);
+ 		      c1 = c1 << 8 | c2;
+ 		    }
+ 		    ret = set_list_bits (last_char, c1, syntax, b, translate);
+ 		    last_char = c1;
+                     if (ret != REG_NOERROR) return ret;
+                   }
+ 
+                 else if (p[0] == '-' && p[1] != ']')
+                   { /* This handles ranges made up of characters only.  */
+                     reg_errcode_t ret;
+ 
+ 		    /* Move past the `-'.  */
+ 		    PATFETCH_RAW (c1);
+                     
+ 		    PATFETCH_RAW (c1);
+ 		    if (ismbchar (c1)) {
+ 		      unsigned char c2;
+ 
+ 		      PATFETCH_RAW (c2);
+ 		      c1 = c1 << 8 | c2;
+ 		    }
+ 		    ret = set_list_bits (c, c1, syntax, b, translate);
+ 		    last_char = c1;
+                     if (ret != REG_NOERROR) return ret;
+                   }
+ 
+                 /* See if we're at the beginning of a possible character
+                    class.  */
+ 
+                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+                   { /* Leave room for the null.  */
+                     char str[CHAR_CLASS_MAX_LENGTH + 1];
+ 
+ 		    PATFETCH_RAW (c);
+                     c1 = 0;
+ 
+                     /* If pattern is `[[:'.  */
+                     if (p == pend) return REG_EBRACK;
+ 
+                     for (;;)
+                       {
+                         PATFETCH (c);
+                         if (c == ':' || c == ']' || p == pend
+                             || c1 == CHAR_CLASS_MAX_LENGTH)
+                           break;
+                         str[c1++] = c;
+                       }
+                     str[c1] = '\0';
+ 
+                     /* If isn't a word bracketed by `[:' and:`]':
+                        undo the ending character, the letters, and leave 
+                        the leading `:' and `[' (but set bits for them).  */
+                     if (c == ':' && *p == ']')
+                       {
+                         int ch;
+                         boolean is_alnum = STREQ (str, "alnum");
+                         boolean is_alpha = STREQ (str, "alpha");
+                         boolean is_blank = STREQ (str, "blank");
+                         boolean is_cntrl = STREQ (str, "cntrl");
+                         boolean is_digit = STREQ (str, "digit");
+                         boolean is_graph = STREQ (str, "graph");
+                         boolean is_lower = STREQ (str, "lower");
+                         boolean is_print = STREQ (str, "print");
+                         boolean is_punct = STREQ (str, "punct");
+                         boolean is_space = STREQ (str, "space");
+                         boolean is_upper = STREQ (str, "upper");
+                         boolean is_xdigit = STREQ (str, "xdigit");
+                         
+                         if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
+ 
+                         /* Throw away the ] at the end of the character
+                            class.  */
+                         PATFETCH (c);					
+ 
+                         if (p == pend) return REG_EBRACK;
+ 
+                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
+                           {
+                             if (   (is_alnum  && ISALNUM (ch))
+                                 || (is_alpha  && ISALPHA (ch))
+                                 || (is_blank  && ISBLANK (ch))
+                                 || (is_cntrl  && ISCNTRL (ch))
+                                 || (is_digit  && ISDIGIT (ch))
+                                 || (is_graph  && ISGRAPH (ch))
+                                 || (is_lower  && ISLOWER (ch))
+                                 || (is_print  && ISPRINT (ch))
+                                 || (is_punct  && ISPUNCT (ch))
+                                 || (is_space  && ISSPACE (ch))
+                                 || (is_upper  && ISUPPER (ch))
+                                 || (is_xdigit && ISXDIGIT (ch)))
+                             SET_LIST_BIT (ch);
+                           }
+                         had_char_class = true;
+ #ifdef DEBUG
+ 			last_char = -1;
+ #endif
+                       }
+                     else
+                       {
+                         c1++;
+                         while (c1--)    
+                           PATUNFETCH;
+ #if 0 /* The original was: */
+                         SET_LIST_BIT ('[');
+                         SET_LIST_BIT (':');
+ #else /* I think this is the right way.  */
+ 			SET_LIST_BIT (TRANSLATE ('['));
+ 			SET_LIST_BIT (TRANSLATE (':'));
+ #endif
+                         had_char_class = false;
+ 			last_char = ':';
+                       }
+                   }
+                 else
+                   {
+                     had_char_class = false;
+ 		    (void) set_list_bits (c, c, syntax, b, translate);
+ 		    last_char = c;
+                   }
+               }
+ 
+             /* Discard any (non)matching list bytes that are all 0 at the
+                end of the map.  Decrease the map-length byte too.  */
+             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 
+               b[-1]--; 
+ 	    if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
+ 	      bcopy (&b[(1 << BYTEWIDTH) / BYTEWIDTH], &b[b[-1]],
+ 		     2 + EXTRACT_UNSIGNED (&b[(1 << BYTEWIDTH) / BYTEWIDTH])*4);
+ 	    b += b[-1] + 2 + EXTRACT_UNSIGNED (&b[b[-1]])*4;
+ 	    break;
+           }
+           break;
+ 
+ 
+ 	case '(':
+           if (syntax & RE_NO_BK_PARENS)
+             goto handle_open;
+           else
+             goto normal_char;
+ 
+ 
+         case ')':
+           if (syntax & RE_NO_BK_PARENS)
+             goto handle_close;
+           else
+             goto normal_char;
+ 
+ 
+         case '\n':
+           if (syntax & RE_NEWLINE_ALT)
+             goto handle_alt;
+           else
+             goto normal_char;
+ 
+ 
+ 	case '|':
+           if (syntax & RE_NO_BK_VBAR)
+             goto handle_alt;
+           else
+             goto normal_char;
+ 
+ 
+         case '{':
+            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
+              goto handle_interval;
+            else
+              goto normal_char;
+ 
+ 
+         case '\\':
+           if (p == pend) return REG_EESCAPE;
+ 
+           /* Do not translate the character after the \, so that we can
+              distinguish, e.g., \B from \b, even if we normally would
+              translate, e.g., B to b.  */
+           PATFETCH_RAW (c);
+ 
+           switch (c)
+             {
+             case '(':
+               if (syntax & RE_NO_BK_PARENS)
+                 goto normal_backslash;
+ 
+             handle_open:
+               bufp->re_nsub++;
+               regnum++;
+ 
+               if (COMPILE_STACK_FULL)
+                 { 
+                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
+                             compile_stack_elt_t);
+                   if (compile_stack.stack == NULL) return REG_ESPACE;
+ 
+                   compile_stack.size <<= 1;
+                 }
+ 
+               /* These are the values to restore when we hit end of this
+                  group.  They are all relative offsets, so that if the
+                  whole pattern moves because of realloc, they will still
+                  be valid.  */
+               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
+               COMPILE_STACK_TOP.fixup_alt_jump 
+                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
+               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
+               COMPILE_STACK_TOP.regnum = regnum;
+ 
+               /* We will eventually replace the 0 with the number of
+                  groups inner to this one.  But do not push a
+                  start_memory for groups beyond the last one we can
+                  represent in the compiled pattern.  */
+               if (regnum <= MAX_REGNUM)
+                 {
+                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
+                   BUF_PUSH_3 (start_memory, regnum, 0);
+                 }
+                 
+               compile_stack.avail++;
+ 
+               fixup_alt_jump = 0;
+               laststart = 0;
+               begalt = b;
+ 	      /* If we've reached MAX_REGNUM groups, then this open
+ 		 won't actually generate any code, so we'll have to
+ 		 clear pending_exact explicitly.  */
+ 	      pending_exact = 0;
+               break;
+ 
+ 
+             case ')':
+               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
+ 
+               if (COMPILE_STACK_EMPTY)
+                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+                   goto normal_backslash;
+                 else
+                   return REG_ERPAREN;
+ 
+             handle_close:
+               if (fixup_alt_jump)
+                 { /* Push a dummy failure point at the end of the
+                      alternative for a possible future
+                      `pop_failure_jump' to pop.  See comments at
+                      `push_dummy_failure' in `re_match_2'.  */
+                   BUF_PUSH (push_dummy_failure);
+                   
+                   /* We allocated space for this jump when we assigned
+                      to `fixup_alt_jump', in the `handle_alt' case below.  */
+                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
+                 }
+ 
+               /* See similar code for backslashed left paren above.  */
+               if (COMPILE_STACK_EMPTY)
+                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+                   goto normal_char;
+                 else
+                   return REG_ERPAREN;
+ 
+               /* Since we just checked for an empty stack above, this
+                  ``can't happen''.  */
+               assert (compile_stack.avail != 0);
+               {
+                 /* We don't just want to restore into `regnum', because
+                    later groups should continue to be numbered higher,
+                    as in `(ab)c(de)' -- the second group is #2.  */
+                 regnum_t this_group_regnum;
+ 
+                 compile_stack.avail--;		
+                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
+                 fixup_alt_jump
+                   = COMPILE_STACK_TOP.fixup_alt_jump
+                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 
+                     : 0;
+                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
+                 this_group_regnum = COMPILE_STACK_TOP.regnum;
+ 		/* If we've reached MAX_REGNUM groups, then this open
+ 		   won't actually generate any code, so we'll have to
+ 		   clear pending_exact explicitly.  */
+ 		pending_exact = 0;
+ 
+                 /* We're at the end of the group, so now we know how many
+                    groups were inside this one.  */
+                 if (this_group_regnum <= MAX_REGNUM)
+                   {
+                     unsigned char *inner_group_loc
+                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
+                     
+                     *inner_group_loc = regnum - this_group_regnum;
+                     BUF_PUSH_3 (stop_memory, this_group_regnum,
+                                 regnum - this_group_regnum);
+                   }
+               }
+               break;
+ 
+ 
+             case '|':					/* `\|'.  */
+               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
+                 goto normal_backslash;
+             handle_alt:
+               if (syntax & RE_LIMITED_OPS)
+                 goto normal_char;
+ 
+               /* Insert before the previous alternative a jump which
+                  jumps to this alternative if the former fails.  */
+               GET_BUFFER_SPACE (3);
+               INSERT_JUMP (on_failure_jump, begalt, b + 6);
+               pending_exact = 0;
+               b += 3;
+ 
+               /* The alternative before this one has a jump after it
+                  which gets executed if it gets matched.  Adjust that
+                  jump so it will jump to this alternative's analogous
+                  jump (put in below, which in turn will jump to the next
+                  (if any) alternative's such jump, etc.).  The last such
+                  jump jumps to the correct final destination.  A picture:
+                           _____ _____ 
+                           |   | |   |   
+                           |   v |   v 
+                          a | b   | c   
+ 
+                  If we are at `b', then fixup_alt_jump right now points to a
+                  three-byte space after `a'.  We'll put in the jump, set
+                  fixup_alt_jump to right after `b', and leave behind three
+                  bytes which we'll fill in when we get to after `c'.  */
+ 
+               if (fixup_alt_jump)
+                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+ 
+               /* Mark and leave space for a jump after this alternative,
+                  to be filled in later either by next alternative or
+                  when know we're at the end of a series of alternatives.  */
+               fixup_alt_jump = b;
+               GET_BUFFER_SPACE (3);
+               b += 3;
+ 
+               laststart = 0;
+               begalt = b;
+               break;
+ 
+ 
+             case '{': 
+               /* If \{ is a literal.  */
+               if (!(syntax & RE_INTERVALS)
+                      /* If we're at `\{' and it's not the open-interval 
+                         operator.  */
+                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+                   || (p - 2 == pattern  &&  p == pend))
+                 goto normal_backslash;
+ 
+             handle_interval:
+               {
+                 /* If got here, then the syntax allows intervals.  */
+ 
+                 /* At least (most) this many matches must be made.  */
+                 int lower_bound = -1, upper_bound = -1;
+ 
+                 beg_interval = p - 1;
+ 
+                 if (p == pend)
+                   {
+                     if (syntax & RE_NO_BK_BRACES)
+                       goto unfetch_interval;
+                     else
+                       return REG_EBRACE;
+                   }
+ 
+                 GET_UNSIGNED_NUMBER (lower_bound);
+ 
+                 if (c == ',')
+                   {
+                     GET_UNSIGNED_NUMBER (upper_bound);
+                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
+                   }
+                 else
+                   /* Interval such as `{1}' => match exactly once. */
+                   upper_bound = lower_bound;
+ 
+                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
+                     || lower_bound > upper_bound)
+                   {
+                     if (syntax & RE_NO_BK_BRACES)
+                       goto unfetch_interval;
+                     else 
+                       return REG_BADBR;
+                   }
+ 
+                 if (!(syntax & RE_NO_BK_BRACES)) 
+                   {
+                     if (c != '\\') return REG_EBRACE;
+ 
+                     PATFETCH (c);
+                   }
+ 
+                 if (c != '}')
+                   {
+                     if (syntax & RE_NO_BK_BRACES)
+                       goto unfetch_interval;
+                     else 
+                       return REG_BADBR;
+                   }
+ 
+                 /* We just parsed a valid interval.  */
+ 
+                 /* If it's invalid to have no preceding re.  */
+                 if (!laststart)
+                   {
+                     if (syntax & RE_CONTEXT_INVALID_OPS)
+                       return REG_BADRPT;
+                     else if (syntax & RE_CONTEXT_INDEP_OPS)
+                       laststart = b;
+                     else
+                       goto unfetch_interval;
+                   }
+ 
+                 /* If the upper bound is zero, don't want to succeed at
+                    all; jump from `laststart' to `b + 3', which will be
+                    the end of the buffer after we insert the jump.  */
+                  if (upper_bound == 0)
+                    {
+                      GET_BUFFER_SPACE (3);
+                      INSERT_JUMP (jump, laststart, b + 3);
+                      b += 3;
+                    }
+ 
+                  /* Otherwise, we have a nontrivial interval.  When
+                     we're all done, the pattern will look like:
+                       set_number_at <jump count> <upper bound>
+                       set_number_at <succeed_n count> <lower bound>
+                       succeed_n <after jump addr> <succed_n count>
+                       <body of loop>
+                       jump_n <succeed_n addr> <jump count>
+                     (The upper bound and `jump_n' are omitted if
+                     `upper_bound' is 1, though.)  */
+                  else 
+                    { /* If the upper bound is > 1, we need to insert
+                         more at the end of the loop.  */
+                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
+ 
+                      GET_BUFFER_SPACE (nbytes);
+ 
+                      /* Initialize lower bound of the `succeed_n', even
+                         though it will be set during matching by its
+                         attendant `set_number_at' (inserted next),
+                         because `re_compile_fastmap' needs to know.
+                         Jump to the `jump_n' we might insert below.  */
+                      INSERT_JUMP2 (succeed_n, laststart,
+                                    b + 5 + (upper_bound > 1) * 5,
+                                    lower_bound);
+                      b += 5;
+ 
+                      /* Code to initialize the lower bound.  Insert 
+                         before the `succeed_n'.  The `5' is the last two
+                         bytes of this `set_number_at', plus 3 bytes of
+                         the following `succeed_n'.  */
+                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
+                      b += 5;
+ 
+                      if (upper_bound > 1)
+                        { /* More than one repetition is allowed, so
+                             append a backward jump to the `succeed_n'
+                             that starts this interval.
+                             
+                             When we've reached this during matching,
+                             we'll have matched the interval once, so
+                             jump back only `upper_bound - 1' times.  */
+                          STORE_JUMP2 (jump_n, b, laststart + 5,
+                                       upper_bound - 1);
+                          b += 5;
+ 
+                          /* The location we want to set is the second
+                             parameter of the `jump_n'; that is `b-2' as
+                             an absolute address.  `laststart' will be
+                             the `set_number_at' we're about to insert;
+                             `laststart+3' the number to set, the source
+                             for the relative address.  But we are
+                             inserting into the middle of the pattern --
+                             so everything is getting moved up by 5.
+                             Conclusion: (b - 2) - (laststart + 3) + 5,
+                             i.e., b - laststart.
+                             
+                             We insert this at the beginning of the loop
+                             so that if we fail during matching, we'll
+                             reinitialize the bounds.  */
+                          insert_op2 (set_number_at, laststart, b - laststart,
+                                      upper_bound - 1, b);
+                          b += 5;
+                        }
+                    }
+                 pending_exact = 0;
+                 beg_interval = NULL;
+               }
+               break;
+ 
+             unfetch_interval:
+               /* If an invalid interval, match the characters as literals.  */
+                assert (beg_interval);
+                p = beg_interval;
+                beg_interval = NULL;
+ 
+                /* normal_char and normal_backslash need `c'.  */
+                PATFETCH (c);	
+ 
+                if (!(syntax & RE_NO_BK_BRACES))
+                  {
+                    if (p > pattern  &&  p[-1] == '\\')
+                      goto normal_backslash;
+                  }
+                goto normal_char;
+ 
+ #ifdef emacs
+             /* There is no way to specify the before_dot and after_dot
+                operators.  rms says this is ok.  --karl  */
+             case '=':
+               BUF_PUSH (at_dot);
+               break;
+ 
+             case 's':	
+               laststart = b;
+               PATFETCH (c);
+               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
+               break;
+ 
+             case 'S':
+               laststart = b;
+               PATFETCH (c);
+               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
+               break;
+ #endif /* emacs */
+ 
+ 
+             case 'w':
+ 	      if (re_syntax_options & RE_NO_GNU_OPS)
+                goto normal_char;
+               laststart = b;
+               BUF_PUSH (wordchar);
+               break;
+ 
+ 
+             case 'W':
+ 	      if (re_syntax_options & RE_NO_GNU_OPS)
+                goto normal_char;
+               laststart = b;
+               BUF_PUSH (notwordchar);
+               break;
+ 
+ 
+             case '<':
+ 	      if (re_syntax_options & RE_NO_GNU_OPS)
+                goto normal_char;
+               BUF_PUSH (wordbeg);
+               break;
+ 
+             case '>':
+ 	      if (re_syntax_options & RE_NO_GNU_OPS)
+                goto normal_char;
+               BUF_PUSH (wordend);
+               break;
+ 
+             case 'b':
+ 	      if (re_syntax_options & RE_NO_GNU_OPS)
+                goto normal_char;
+               BUF_PUSH (wordbound);
+               break;
+ 
+             case 'B':
+ 	      if (re_syntax_options & RE_NO_GNU_OPS)
+                goto normal_char;
+               BUF_PUSH (notwordbound);
+               break;
+ 
+             case '`':
+ 	      if (re_syntax_options & RE_NO_GNU_OPS)
+                goto normal_char;
+               BUF_PUSH (begbuf);
+               break;
+ 
+             case '\'':
+ 	      if (re_syntax_options & RE_NO_GNU_OPS)
+                goto normal_char;
+               BUF_PUSH (endbuf);
+               break;
+ 
+             case '1': case '2': case '3': case '4': case '5':
+             case '6': case '7': case '8': case '9':
+               if (syntax & RE_NO_BK_REFS)
+                 goto normal_char;
+ 
+               c1 = c - '0';
+ 
+               if (c1 > regnum)
+                 return REG_ESUBREG;
+ 
+               /* Can't back reference to a subexpression if inside of it.  */
+               if (group_in_compile_stack (compile_stack, (regnum_t)c1))
+                 goto normal_char;
+ 
+               laststart = b;
+               BUF_PUSH_2 (duplicate, c1);
+               break;
+ 
+ 
+             case '+':
+             case '?':
+               if (syntax & RE_BK_PLUS_QM)
+                 goto handle_plus;
+               else
+                 goto normal_backslash;
+ 
+             default:
+             normal_backslash:
+               /* You might think it would be useful for \ to mean
+                  not to translate; but if we don't translate it
+                  it will never match anything.  */
+ 	      if (!ismbchar (c))
+ 		c = TRANSLATE (c);
+               goto normal_char;
+             }
+           break;
+ 
+ 
+ 	default:
+         /* Expects the character in `c'.  */
+ 	normal_char:
+ 
+ 	  c1 = 0;
+ 	  if (ismbchar (c)) {
+ 	    c1 = c;
+ 	    PATFETCH_RAW (c);
+ 	  }
+ 
+ 	      /* If no exactn currently being built.  */
+           if (!pending_exact 
+ 
+               /* If last exactn not at current position.  */
+               || pending_exact + *pending_exact + 1 != b
+               
+               /* We have only one byte following the exactn for the count.  */
+ 	      || *pending_exact >= (c1 ? (1 << BYTEWIDTH) - 2
+ 				       : (1 << BYTEWIDTH) - 1)
+ 
+               /* If followed by a repetition operator.  */
+               || *p == '*' || *p == '^'
+ 	      || ((syntax & RE_BK_PLUS_QM)
+ 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
+ 		  : (*p == '+' || *p == '?'))
+ 	      || ((syntax & RE_INTERVALS)
+                   && ((syntax & RE_NO_BK_BRACES)
+ 		      ? *p == '{'
+                       : (p[0] == '\\' && p[1] == '{'))))
+ 	    {
+ 	      /* Start building a new exactn.  */
+               
+               laststart = b;
+ 
+ 	      BUF_PUSH_2 (exactn, 0);
+ 	      pending_exact = b - 1;
+             }
+             
+ 	  if (c1) {
+ 	    BUF_PUSH (c1);
+ 	    (*pending_exact)++;
+ 	  }
+ 	  BUF_PUSH (c);
+           (*pending_exact)++;
+ 	  break;
+         } /* switch (c) */
+     } /* while p != pend */
+ 
+   
+   /* Through the pattern now.  */
+   
+   if (fixup_alt_jump)
+     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+ 
+   if (!COMPILE_STACK_EMPTY) 
+     return REG_EPAREN;
+ 
+   free (compile_stack.stack);
+ 
+   /* We have succeeded; set the length of the buffer.  */
+   bufp->used = b - bufp->buffer;
+ 
+ #ifdef DEBUG
+   if (debug)
+     {
+       DEBUG_PRINT1 ("\nCompiled pattern: \n");
+       print_compiled_pattern (bufp);
+     }
+ #endif /* DEBUG */
+ 
+   return REG_NOERROR;
+ } /* regex_compile */
+ 
+ /* Subroutines for `regex_compile'.  */
+ 
+ /* Store OP at LOC followed by two-byte integer parameter ARG.  */
+ 
+ static void
+ store_op1 (op, loc, arg)
+     re_opcode_t op;
+     unsigned char *loc;
+     int arg;
+ {
+   *loc = (unsigned char) op;
+   STORE_NUMBER (loc + 1, arg);
+ }
+ 
+ 
+ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
+ 
+ static void
+ store_op2 (op, loc, arg1, arg2)
+     re_opcode_t op;
+     unsigned char *loc;
+     int arg1, arg2;
+ {
+   *loc = (unsigned char) op;
+   STORE_NUMBER (loc + 1, arg1);
+   STORE_NUMBER (loc + 3, arg2);
+ }
+ 
+ 
+ /* Copy the bytes from LOC to END to open up three bytes of space at LOC
+    for OP followed by two-byte integer parameter ARG.  */
+ 
+ static void
+ insert_op1 (op, loc, arg, end)
+     re_opcode_t op;
+     unsigned char *loc;
+     int arg;
+     unsigned char *end;    
+ {
+   register unsigned char *pfrom = end;
+   register unsigned char *pto = end + 3;
+ 
+   while (pfrom != loc)
+     *--pto = *--pfrom;
+     
+   store_op1 (op, loc, arg);
+ }
+ 
+ 
+ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
+ 
+ static void
+ insert_op2 (op, loc, arg1, arg2, end)
+     re_opcode_t op;
+     unsigned char *loc;
+     int arg1, arg2;
+     unsigned char *end;    
+ {
+   register unsigned char *pfrom = end;
+   register unsigned char *pto = end + 5;
+ 
+   while (pfrom != loc)
+     *--pto = *--pfrom;
+     
+   store_op2 (op, loc, arg1, arg2);
+ }
+ 
+ 
+ /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
+    after an alternative or a begin-subexpression.  We assume there is at
+    least one character before the ^.  */
+ 
+ static boolean
+ at_begline_loc_p (pattern, p, syntax)
+     const char *pattern, *p;
+     reg_syntax_t syntax;
+ {
+   const char *prev = p - 2;
+   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+   
+   return
+        /* After a subexpression?  */
+        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
+        /* After an alternative?  */
+     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
+ }
+ 
+ 
+ /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
+    at least one character after the $, i.e., `P < PEND'.  */
+ 
+ static boolean
+ at_endline_loc_p (p, pend, syntax)
+     const char *p, *pend;
+     reg_syntax_t syntax;
+ {
+   const char *next = p;
+   boolean next_backslash = *next == '\\';
+   const char *next_next = p + 1 < pend ? p + 1 : NULL;
+   
+   return
+        /* Before a subexpression?  */
+        (syntax & RE_NO_BK_PARENS ? *next == ')'
+         : next_backslash && next_next && *next_next == ')')
+        /* Before an alternative?  */
+     || (syntax & RE_NO_BK_VBAR ? *next == '|'
+         : next_backslash && next_next && *next_next == '|');
+ }
+ 
+ 
+ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and 
+    false if it's not.  */
+ 
+ static boolean
+ group_in_compile_stack (compile_stack, regnum)
+     compile_stack_type compile_stack;
+     regnum_t regnum;
+ {
+   int this_element;
+ 
+   for (this_element = compile_stack.avail - 1;  
+        this_element >= 0; 
+        this_element--)
+     if (compile_stack.stack[this_element].regnum == regnum)
+       return true;
+ 
+   return false;
+ }
+ 
+ 
+ #if 0 /* We use set_list_bits() now.  */
+ /* Read the ending character of a range (in a bracket expression) from the
+    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
+    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
+    Then we set the translation of all bits between the starting and
+    ending characters (inclusive) in the compiled pattern B.
+    
+    Return an error code.
+    
+    We use these short variable names so we can use the same macros as
+    `regex_compile' itself.  */
+ 
+ static reg_errcode_t
+ compile_range (p_ptr, pend, translate, syntax, b)
+     const char **p_ptr, *pend;
+     char *translate;
+     reg_syntax_t syntax;
+     unsigned char *b;
+ {
+   unsigned this_char;
+ 
+   const char *p = *p_ptr;
+   int range_start, range_end;
+   
+   if (p == pend)
+     return REG_ERANGE;
+ 
+   /* Even though the pattern is a signed `char *', we need to fetch
+      with unsigned char *'s; if the high bit of the pattern character
+      is set, the range endpoints will be negative if we fetch using a
+      signed char *.
+ 
+      We also want to fetch the endpoints without translating them; the 
+      appropriate translation is done in the bit-setting loop below.  */
+   range_start = ((unsigned char *) p)[-2];
+   range_end   = ((unsigned char *) p)[0];
+ 
+   /* Have to increment the pointer into the pattern string, so the
+      caller isn't still at the ending character.  */
+   (*p_ptr)++;
+ 
+   /* If the start is after the end, the range is empty.  */
+   if (range_start > range_end)
+     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+ 
+   /* Here we see why `this_char' has to be larger than an `unsigned
+      char' -- the range is inclusive, so if `range_end' == 0xff
+      (assuming 8-bit characters), we would otherwise go into an infinite
+      loop, since all characters <= 0xff.  */
+   for (this_char = range_start; this_char <= range_end; this_char++)
+     {
+       SET_LIST_BIT (TRANSLATE (this_char));
+     }
+   
+   return REG_NOERROR;
+ }
+ #endif
+ 
+ /* Failure stack declarations and macros; both re_compile_fastmap and
+    re_match_2 use a failure stack.  These have to be macros because of
+    REGEX_ALLOCATE.  */
+    
+ 
+ /* Number of failure points for which to initially allocate space
+    when matching.  If this number is exceeded, we allocate more
+    space, so it is not a hard limit.  */
+ #ifndef INIT_FAILURE_ALLOC
+ #define INIT_FAILURE_ALLOC 5
+ #endif
+ 
+ /* Roughly the maximum number of failure points on the stack.  Would be
+    exactly that if always used MAX_FAILURE_SPACE each time we failed.
+    This is a variable only so users of regex can assign to it; we never
+    change it ourselves.  */
+ int re_max_failures = 2000;
+ 
+ typedef const unsigned char *fail_stack_elt_t;
+ 
+ typedef struct
+ {
+   fail_stack_elt_t *stack;
+   unsigned size;
+   unsigned avail;			/* Offset of next open position.  */
+ } fail_stack_type;
+ 
+ #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
+ #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
+ #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
+ #define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
+ 
+ 
+ /* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
+ 
+ #define INIT_FAIL_STACK()						\
+   do {									\
+     fail_stack.stack = (fail_stack_elt_t *)				\
+       REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
+ 									\
+     if (fail_stack.stack == NULL)					\
+       return -2;							\
+ 									\
+     fail_stack.size = INIT_FAILURE_ALLOC;				\
+     fail_stack.avail = 0;						\
+   } while (0)
+ 
+ 
+ /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+ 
+    Return 1 if succeeds, and 0 if either ran out of memory
+    allocating space for it or it was already too large.  
+    
+    REGEX_REALLOCATE requires `destination' be declared.   */
+ 
+ #define DOUBLE_FAIL_STACK(fail_stack)					\
+   ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\
+    ? 0									\
+    : ((fail_stack).stack = (fail_stack_elt_t *)				\
+         REGEX_REALLOCATE ((fail_stack).stack, 				\
+           (fail_stack).size * sizeof (fail_stack_elt_t),		\
+           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
+ 									\
+       (fail_stack).stack == NULL					\
+       ? 0								\
+       : ((fail_stack).size <<= 1, 					\
+          1)))
+ 
+ 
+ /* Push PATTERN_OP on FAIL_STACK. 
+ 
+    Return 1 if was able to do so and 0 if ran out of memory allocating
+    space to do so.  */
+ #define PUSH_PATTERN_OP(pattern_op, fail_stack)				\
+   ((FAIL_STACK_FULL ()							\
+     && !DOUBLE_FAIL_STACK (fail_stack))					\
+     ? 0									\
+     : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,		\
+        1))
+ 
+ /* This pushes an item onto the failure stack.  Must be a four-byte
+    value.  Assumes the variable `fail_stack'.  Probably should only
+    be called from within `PUSH_FAILURE_POINT'.  */
+ #define PUSH_FAILURE_ITEM(item)						\
+   fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
+ 
+ /* The complement operation.  Assumes `fail_stack' is nonempty.  */
+ #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
+ 
+ /* Used to omit pushing failure point id's when we're not debugging.  */
+ #ifdef DEBUG
+ #define DEBUG_PUSH PUSH_FAILURE_ITEM
+ #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
+ #else
+ #define DEBUG_PUSH(item)
+ #define DEBUG_POP(item_addr)
+ #endif
+ 
+ 
+ /* Push the information about the state we will need
+    if we ever fail back to it.  
+    
+    Requires variables fail_stack, regstart, regend, reg_info, and
+    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
+    declared.
+    
+    Does `return FAILURE_CODE' if runs out of memory.  */
+ 
+ #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
+   do {									\
+     char *destination;							\
+     /* Must be int, so when we don't save any registers, the arithmetic	\
+        of 0 + -1 isn't done as unsigned.  */				\
+     /* Can't be int, since there is not a shred of a guarantee that int \
+        is wide enough to hold a value of something to which pointer can \
+        be assigned */							\
+     s_reg_t this_reg;							\
+     									\
+     DEBUG_STATEMENT (failure_id++);					\
+     DEBUG_STATEMENT (nfailure_points_pushed++);				\
+     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
+     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
+     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
+ 									\
+     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
+     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
+ 									\
+     /* Ensure we have enough space allocated for what we will push.  */	\
+     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
+       {									\
+ 	if (!DOUBLE_FAIL_STACK (fail_stack))			\
+ 	  return failure_code;						\
+ 									\
+ 	DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
+ 		       (fail_stack).size);				\
+ 	DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
+       }									
+ 
+ #define PUSH_FAILURE_POINT2(pattern_place, string_place, failure_code)	\
+     /* Push the info, starting with the registers.  */			\
+     DEBUG_PRINT1 ("\n");						\
+ 									\
+     PUSH_FAILURE_POINT_LOOP ();						\
+ 									\
+     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
+     PUSH_FAILURE_ITEM (lowest_active_reg);				\
+ 									\
+     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
+     PUSH_FAILURE_ITEM (highest_active_reg);				\
+ 									\
+     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
+     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
+     PUSH_FAILURE_ITEM (pattern_place);					\
+ 									\
+     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
+     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,	\
+ 				 size2);				\
+     DEBUG_PRINT1 ("'\n");						\
+     PUSH_FAILURE_ITEM (string_place);					\
+ 									\
+     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
+     DEBUG_PUSH (failure_id);						\
+   } while (0)
+ 
+ /*  Pulled out of PUSH_FAILURE_POINT() to shorten the definition
+     of that macro.  (for VAX C) */
+ #define PUSH_FAILURE_POINT_LOOP()					\
+     for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;	\
+ 	 this_reg++)							\
+       {									\
+ 	DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);			\
+ 	DEBUG_STATEMENT (num_regs_pushed++);				\
+ 									\
+ 	DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);		\
+ 	PUSH_FAILURE_ITEM (regstart[this_reg]);				\
+ 									\
+ 	DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
+ 	PUSH_FAILURE_ITEM (regend[this_reg]);				\
+ 									\
+ 	DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
+ 	DEBUG_PRINT2 (" match_null=%d",					\
+ 		      REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
+ 	DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
+ 	DEBUG_PRINT2 (" matched_something=%d",				\
+ 		      MATCHED_SOMETHING (reg_info[this_reg]));		\
+ 	DEBUG_PRINT2 (" ever_matched=%d",				\
+ 		      EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
+ 	DEBUG_PRINT1 ("\n");						\
+ 	PUSH_FAILURE_ITEM (reg_info[this_reg].word);			\
+       }
+ 
+ /* This is the number of items that are pushed and popped on the stack
+    for each register.  */
+ #define NUM_REG_ITEMS  3
+ 
+ /* Individual items aside from the registers.  */
+ #ifdef DEBUG
+ #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
+ #else
+ #define NUM_NONREG_ITEMS 4
+ #endif
+ 
+ /* We push at most this many items on the stack.  */
+ #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+ 
+ /* We actually push this many items.  */
+ #define NUM_FAILURE_ITEMS						\
+   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS 	\
+     + NUM_NONREG_ITEMS)
+ 
+ /* How many items can still be added to the stack without overflowing it.  */
+ #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+ 
+ 
+ /* Pops what PUSH_FAIL_STACK pushes.
+ 
+    We restore into the parameters, all of which should be lvalues:
+      STR -- the saved data position.
+      PAT -- the saved pattern position.
+      LOW_REG, HIGH_REG -- the highest and lowest active registers.
+      REGSTART, REGEND -- arrays of string positions.
+      REG_INFO -- array of information about each subexpression.
+    
+    Also assumes the variables `fail_stack' and (if debugging), `bufp',
+    `pend', `string1', `size1', `string2', and `size2'.  */
+ 
+ #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
+ {									\
+   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
+   s_reg_t this_reg;							\
+   const unsigned char *string_temp;					\
+ 									\
+   assert (!FAIL_STACK_EMPTY ());					\
+ 									\
+   /* Remove failure points and point to how many regs pushed.  */	\
+   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
+   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
+   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
+ 									\
+   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
+ 									\
+   DEBUG_POP (&failure_id);						\
+   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
+ 									\
+   /* If the saved string location is NULL, it came from an		\
+      on_failure_keep_string_jump opcode, and we want to throw away the	\
+      saved NULL, thus retaining our current position in the string.  */	\
+   string_temp = POP_FAILURE_ITEM ();					\
+   if (string_temp != NULL)						\
+     str = (const char *) string_temp;					\
+ 									\
+   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
+   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
+   DEBUG_PRINT1 ("'\n");							\
+ 									\
+   pat = (unsigned char *) POP_FAILURE_ITEM ();				\
+   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
+   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
+ 									\
+   POP_FAILURE_POINT2 (low_reg, high_reg, regstart, regend, reg_info);
+ 
+ /*  Pulled out of POP_FAILURE_POINT() to shorten the definition
+     of that macro.  (for MSC 5.1) */
+ #define POP_FAILURE_POINT2(low_reg, high_reg, regstart, regend, reg_info) \
+ 									\
+   /* Restore register info.  */						\
+   high_reg = (active_reg_t) POP_FAILURE_ITEM ();			\
+   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
+ 									\
+   low_reg = (active_reg_t) POP_FAILURE_ITEM ();				\
+   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
+ 									\
+   for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
+     {									\
+       DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);			\
+ 									\
+       reg_info[this_reg].word = POP_FAILURE_ITEM ();			\
+       DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);		\
+ 									\
+       regend[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
+       DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);		\
+ 									\
+       regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
+       DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);		\
+     }									\
+ 									\
+   DEBUG_STATEMENT (nfailure_points_popped++);				\
+ } /* POP_FAILURE_POINT */
+ 
+ 
+ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
+    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
+    characters can start a string that matches the pattern.  This fastmap
+    is used by re_search to skip quickly over impossible starting points.
+ 
+    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
+    area as BUFP->fastmap.
+    
+    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
+    the pattern buffer.
+ 
+    Returns 0 if we succeed, -2 if an internal error.   */
+ 
+ int
+ re_compile_fastmap (bufp)
+      struct re_pattern_buffer *bufp;
+ {
+   int j, k;
+   fail_stack_type fail_stack;
+ #ifndef REGEX_MALLOC
+   char *destination;
+ #endif
+   /* We don't push any register information onto the failure stack.  */
+   unsigned num_regs = 0;
+   
+   register char *fastmap = bufp->fastmap;
+   unsigned char *pattern = bufp->buffer;
+   const unsigned char *p = pattern;
+   register unsigned char *pend = pattern + bufp->used;
+ 
+   /* Assume that each path through the pattern can be null until
+      proven otherwise.  We set this false at the bottom of switch
+      statement, to which we get only if a particular path doesn't
+      match the empty string.  */
+   boolean path_can_be_null = true;
+ 
+   /* We aren't doing a `succeed_n' to begin with.  */
+   boolean succeed_n_p = false;
+ 
+   assert (fastmap != NULL && p != NULL);
+   
+   INIT_FAIL_STACK ();
+   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
+   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
+   bufp->can_be_null = 0;
+       
+   while (p != pend || !FAIL_STACK_EMPTY ())
+     {
+       if (p == pend)
+         {
+           bufp->can_be_null |= path_can_be_null;
+           
+           /* Reset for next path.  */
+           path_can_be_null = true;
+           
+           p = fail_stack.stack[--fail_stack.avail];
+ 	}
+ 
+       /* We should never be about to go beyond the end of the pattern.  */
+       assert (p < pend);
+       
+ #ifdef SWITCH_ENUM_BUG
+       switch ((int) ((re_opcode_t) *p++))
+ #else
+       switch ((re_opcode_t) *p++)
+ #endif
+ 	{
+ 
+         /* I guess the idea here is to simply not bother with a fastmap
+            if a backreference is used, since it's too hard to figure out
+            the fastmap for the corresponding group.  Setting
+            `can_be_null' stops `re_search_2' from using the fastmap, so
+            that is all we do.  */
+ 	case duplicate:
+ 	  bufp->can_be_null = 1;
+           return 0;
+ 
+ 
+       /* Following are the cases which match a character.  These end
+          with `break'.  */
+ 
+ 	case exactn:
+           fastmap[p[1]] = 1;
+ 	  break;
+ 
+ 
+         case charset:
+ 	  /* NOTE: Charset for single-byte chars never contain
+ 		   multi-byte char.  See set_list_bits().  */
+           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+               fastmap[j] = 1;
+ 	  {
+ 	    unsigned short size;
+ 	    unsigned char c, end;
+ 
+ 	    p += p[-1] + 2;
+ 	    size = EXTRACT_UNSIGNED (&p[-2]);
+ 	    for (j = 0; j < size; j++)
+ 	      /* set bits for 1st bytes of multi-byte chars.  */
+ 	      for (c = (unsigned char) p[j*4],
+ 		   end = (unsigned char) p[j*4 + 2];
+ 		   c <= end; c++)
+ 		/* NOTE: Charset for multi-byte chars might contain
+ 		         single-byte chars.  We must reject them. */
+ 		if (ismbchar (c))
+ 		  fastmap[c] = 1;
+ 	  }
+ 	  break;
+ 
+ 
+ 	case charset_not:
+ 	  /* S: set of all single-byte chars.
+ 	     M: set of all first bytes that can start multi-byte chars.
+ 	     s: any set of single-byte chars.
+ 	     m: any set of first bytes that can start multi-byte chars.
+ 
+ 	     We assume S+M = U.
+ 	       ___      _   _
+ 	       s+m = (S*s+M*m).  */
+ 	  /* Chars beyond end of map must be allowed.  */
+ 	  /* NOTE: Charset_not for single-byte chars might contain
+ 		   multi-byte chars.  See set_list_bits(). */
+ 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
+ 	    if (!ismbchar (j))
+ 	      fastmap[j] = 1;
+ 
+ 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+ 	      if (!ismbchar (j))
+ 		fastmap[j] = 1;
+ 	  {
+ 	    unsigned short size;
+ 	    unsigned short c, beg;
+ 
+ 	    p += p[-1] + 2;
+ 	    size = EXTRACT_UNSIGNED (&p[-2]);
+ 	    c = 0x00;
+ 	    for (j = 0; j < size; j++) {
+ 	      for (beg = (unsigned char) p[j*4 + 0]; c <= beg; c++)
+ 		if (ismbchar (c))
+ 		  fastmap[c] = 1;
+ 	      c = (unsigned char) p[j*4 + 2];
+ 	    }
+ 	    for (beg = 0xff; c <= beg; c++)
+ 	      if (ismbchar (c))
+ 		fastmap[c] = 1;
+ 	  }
+           break;
+ 
+ 
+ 	case wordchar:
+ 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+ 	    if (SYNTAX (j) == Sword)
+ 	      fastmap[j] = 1;
+ 	  break;
+ 
+ 
+ 	case notwordchar:
+ 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+ 	    if (SYNTAX (j) != Sword)
+ 	      fastmap[j] = 1;
+ 	  break;
+ 
+ 
+         case anychar:
+           /* `.' matches anything ...  */
+ 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+             fastmap[j] = 1;
+ 
+           /* ... except perhaps newline.  */
+           if (!(bufp->syntax & RE_DOT_NEWLINE))
+             fastmap['\n'] = 0;
+ 
+           /* Return if we have already set `can_be_null'; if we have,
+              then the fastmap is irrelevant.  Something's wrong here.  */
+ 	  else if (bufp->can_be_null)
+ 	    return 0;
+ 
+           /* Otherwise, have to check alternative paths.  */
+ 	  break;
+ 
+ 
+ #ifdef emacs
+         case syntaxspec:
+ 	  k = *p++;
+ 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+ 	    if (SYNTAX (j) == (enum syntaxcode) k)
+ 	      fastmap[j] = 1;
+ 	  break;
+ 
+ 
+ 	case notsyntaxspec:
+ 	  k = *p++;
+ 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+ 	    if (SYNTAX (j) != (enum syntaxcode) k)
+ 	      fastmap[j] = 1;
+ 	  break;
+ 
+ 
+       /* All cases after this match the empty string.  These end with
+          `continue'.  */
+ 
+ 
+ 	case before_dot:
+ 	case at_dot:
+ 	case after_dot:
+           continue;
+ #endif /* not emacs */
+ 
+ 
+         case no_op:
+         case begline:
+         case endline:
+ 	case begbuf:
+ 	case endbuf:
+ 	case wordbound:
+ 	case notwordbound:
+ 	case wordbeg:
+ 	case wordend:
+         case push_dummy_failure:
+           continue;
+ 
+ 
+ 	case jump_n:
+         case pop_failure_jump:
+ 	case maybe_pop_jump:
+ 	case jump:
+         case jump_past_alt:
+ 	case dummy_failure_jump:
+           EXTRACT_NUMBER_AND_INCR (j, p);
+ 	  p += j;	
+ 	  if (j > 0)
+ 	    continue;
+             
+           /* Jump backward implies we just went through the body of a
+              loop and matched nothing.  Opcode jumped to should be
+              `on_failure_jump' or `succeed_n'.  Just treat it like an
+              ordinary jump.  For a * loop, it has pushed its failure
+              point already; if so, discard that as redundant.  */
+           if ((re_opcode_t) *p != on_failure_jump
+ 	      && (re_opcode_t) *p != succeed_n)
+ 	    continue;
+ 
+           p++;
+           EXTRACT_NUMBER_AND_INCR (j, p);
+           p += j;		
+ 	  
+           /* If what's on the stack is where we are now, pop it.  */
+           if (!FAIL_STACK_EMPTY () 
+ 	      && fail_stack.stack[fail_stack.avail - 1] == p)
+             fail_stack.avail--;
+ 
+           continue;
+ 
+ 
+         case on_failure_jump:
+         case on_failure_keep_string_jump:
+ 	handle_on_failure_jump:
+           EXTRACT_NUMBER_AND_INCR (j, p);
+ 
+           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
+              end of the pattern.  We don't want to push such a point,
+              since when we restore it above, entering the switch will
+              increment `p' past the end of the pattern.  We don't need
+              to push such a point since we obviously won't find any more
+              fastmap entries beyond `pend'.  Such a pattern can match
+              the null string, though.  */
+           if (p + j < pend)
+             {
+               if (!PUSH_PATTERN_OP (p + j, fail_stack))
+                 return -2;
+             }
+           else
+             bufp->can_be_null = 1;
+ 
+           if (succeed_n_p)
+             {
+               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
+               succeed_n_p = false;
+ 	    }
+ 
+           continue;
+ 
+ 
+ 	case succeed_n:
+           /* Get to the number of times to succeed.  */
+           p += 2;		
+ 
+           /* Increment p past the n for when k != 0.  */
+           EXTRACT_NUMBER_AND_INCR (k, p);
+           if (k == 0)
+ 	    {
+               p -= 4;
+   	      succeed_n_p = true;  /* Spaghetti code alert.  */
+               goto handle_on_failure_jump;
+             }
+           continue;
+ 
+ 
+ 	case set_number_at:
+           p += 4;
+           continue;
+ 
+ 
+ 	case start_memory:
+         case stop_memory:
+ 	  p += 2;
+ 	  continue;
+ 
+ 
+ 	default:
+           abort (); /* We have listed all the cases.  */
+         } /* switch *p++ */
+ 
+       /* Getting here means we have found the possible starting
+          characters for one path of the pattern -- and that the empty
+          string does not match.  We need not follow this path further.
+          Instead, look at the next alternative (remembered on the
+          stack), or quit if no more.  The test at the top of the loop
+          does these things.  */
+       path_can_be_null = false;
+       p = pend;
+     } /* while p */
+ 
+   /* Set `can_be_null' for the last path (also the first path, if the
+      pattern is empty).  */
+   bufp->can_be_null |= path_can_be_null;
+   return 0;
+ } /* re_compile_fastmap */
+ 
+ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
+    this memory for recording register information.  STARTS and ENDS
+    must be allocated using the malloc library routine, and must each
+    be at least NUM_REGS * sizeof (regoff_t) bytes long.
+ 
+    If NUM_REGS == 0, then subsequent matches should allocate their own
+    register data.
+ 
+    Unless this function is called, the first search or match using
+    PATTERN_BUFFER will allocate its own register data, without
+    freeing the old data.  */
+ 
+ void
+ re_set_registers (bufp, regs, num_regs, starts, ends)
+     struct re_pattern_buffer *bufp;
+     struct re_registers *regs;
+     unsigned num_regs;
+     regoff_t *starts, *ends;
+ {
+   if (num_regs)
+     {
+       bufp->regs_allocated = REGS_REALLOCATE;
+       regs->num_regs = num_regs;
+       regs->start = starts;
+       regs->end = ends;
+     }
+   else
+     {
+       bufp->regs_allocated = REGS_UNALLOCATED;
+       regs->num_regs = 0;
+       regs->start = regs->end = 0;
+     }
+ }
+ 
+ /* Searching routines.  */
+ 
+ /* Like re_search_2, below, but only one string is specified, and
+    doesn't let you say where to stop matching. */
+ 
+ int
+ re_search (bufp, string, size, startpos, range, regs)
+      struct re_pattern_buffer *bufp;
+      const char *string;
+      int size, startpos, range;
+      struct re_registers *regs;
+ {
+   return re_search_2 (bufp, NULL, 0, string, size, startpos, range, 
+ 		      regs, size);
+ }
+ 
+ 
+ /* Using the compiled pattern in BUFP->buffer, first tries to match the
+    virtual concatenation of STRING1 and STRING2, starting first at index
+    STARTPOS, then at STARTPOS + 1, and so on.
+    
+    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
+    
+    RANGE is how far to scan while trying to match.  RANGE = 0 means try
+    only at STARTPOS; in general, the last start tried is STARTPOS +
+    RANGE.
+    
+    In REGS, return the indices of the virtual concatenation of STRING1
+    and STRING2 that matched the entire BUFP->buffer and its contained
+    subexpressions.
+    
+    Do not consider matching one past the index STOP in the virtual
+    concatenation of STRING1 and STRING2.
+ 
+    We return either the position in the strings at which the match was
+    found, -1 if no match, or -2 if error (such as failure
+    stack overflow).  */
+ 
+ int
+ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
+      struct re_pattern_buffer *bufp;
+      const char *string1, *string2;
+      int size1, size2;
+      int startpos;
+      int range;
+      struct re_registers *regs;
+      int stop;
+ {
+   int val;
+   register char *fastmap = bufp->fastmap;
+   register char *translate = bufp->translate;
+   int total_size = size1 + size2;
+   int endpos = startpos + range;
+ 
+   /* Check for out-of-range STARTPOS.  */
+   if (startpos < 0 || startpos > total_size)
+     return -1;
+     
+   /* Fix up RANGE if it might eventually take us outside
+      the virtual concatenation of STRING1 and STRING2.  */
+   if (endpos < -1)
+     range = -1 - startpos;
+   else if (endpos > total_size)
+     range = total_size - startpos;
+ 
+   /* If the search isn't to be a backwards one, don't waste time in a
+      search for a pattern that must be anchored.  */
+   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
+     {
+       if (startpos > 0)
+ 	return -1;
+       else
+ 	range = 1;
+     }
+ 
+   /* Update the fastmap now if not correct already.  */
+   if (fastmap && !bufp->fastmap_accurate)
+     if (re_compile_fastmap (bufp) == -2)
+       return -2;
+   
+   /* Loop through the string, looking for a place to start matching.  */
+   for (;;)
+     { 
+       /* If a fastmap is supplied, skip quickly over characters that
+          cannot be the start of a match.  If the pattern can match the
+          null string, however, we don't need to skip characters; we want
+          the first null string.  */
+       if (fastmap && startpos < total_size && !bufp->can_be_null)
+ 	{
+ 	  if (range > 0)	/* Searching forwards.  */
+ 	    {
+ 	      register const char *d;
+ 	      register int lim = 0;
+ 	      int irange = range;
+ 	      unsigned char c;
+ 
+               if (startpos < size1 && startpos + range >= size1)
+                 lim = range - (size1 - startpos);
+ 
+ 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
+    
+               /* Written out as an if-else to avoid testing `translate'
+                  inside the loop.  */
+ 	      if (translate)
+ 		while (range > lim) {
+ 		  c = *d++;
+ 		  if (ismbchar (c)) {
+ 		    if (fastmap[c])
+ 		      break;
+ 		    d++;
+ 		    range -= 2;
+ 		    continue;
+ 		  }
+ 		  if (fastmap[(unsigned char) translate[c]])
+ 		    break;
+                   range--;
+ 		}
+ 	      else
+ 		while (range > lim && (c = *d++, !fastmap[c])) {
+ 		  if (ismbchar (c))
+ 		    d++, range--;
+                   range--;
+ 		}
+ 
+ 	      startpos += irange - range;
+ 	    }
+ 	  else				/* Searching backwards.  */
+ 	    {
+ 	      register char c = (size1 == 0 || startpos >= size1
+                                  ? string2[startpos - size1] 
+                                  : string1[startpos]);
+ 
+ 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
+ 		goto advance;
+ 	    }
+ 	}
+ 
+       /* If can't match the null string, and that's all we have left, fail.  */
+       if (range >= 0 && startpos == total_size && fastmap
+           && !bufp->can_be_null)
+ 	return -1;
+ 
+       val = re_match_2 (bufp, string1, size1, string2, size2,
+ 	                startpos, regs, stop);
+       if (val >= 0)
+ 	return startpos;
+         
+       if (val == -2)
+ 	return -2;
+ 
+     advance:
+       if (!range) 
+         break;
+       else if (range > 0) 
+         {
+ 	  const char *d = ((startpos >= size1 ? string2 - size1 : string1)
+ 			   + startpos);
+ 
+ 	  if (ismbchar (*d)) {
+ 	    range--, startpos++;
+ 	    if (!range)
+ 	      break;
+ 	  }
+           range--, startpos++;
+         }
+       else
+         {
+           range++, startpos--;
+ 	  {
+ 	    const char *s, *d, *p;
+ 
+ 	    if (startpos < size1)
+ 	      s = string1, d = string1 + startpos;
+ 	    else
+ 	      s = string2, d = string2 + startpos - size1;
+ 	    for (p = d; p-- > s && ismbchar(*p); )
+ 	      /* --p >= s だと 80[12]?86 で動かない可能性がある.  (huge
+ 		 model 以外で, s のオフセットが 0 だった場合.)  */
+ 	      ;
+ 	    if (!((d - p) & 1)) {
+ 	      if (!range)
+ 		break;
+ 	      range++, startpos--;
+ 	    }
+ 	  }
+         }
+     }
+   return -1;
+ } /* re_search_2 */
+ 
+ /* Structure for per-register (a.k.a. per-group) information.
+    This must not be longer than one word, because we push this value
+    onto the failure stack.  Other register information, such as the
+    starting and ending positions (which are addresses), and the list of
+    inner groups (which is a bits list) are maintained in separate
+    variables.  
+    
+    We are making a (strictly speaking) nonportable assumption here: that
+    the compiler will pack our bit fields into something that fits into
+    the type of `word', i.e., is something that fits into one item on the
+    failure stack.  */
+ 
+ /* Declarations and macros for re_match_2.  */
+ 
+ typedef union
+ {
+   fail_stack_elt_t word;
+   struct
+   {
+       /* This field is one if this group can match the empty string,
+          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
+ #define MATCH_NULL_UNSET_VALUE 3
+     unsigned match_null_string_p : 2;
+     unsigned is_active : 1;
+     unsigned matched_something : 1;
+     unsigned ever_matched_something : 1;
+   } bits;
+ } register_info_type;
+ 
+ #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
+ #define IS_ACTIVE(R)  ((R).bits.is_active)
+ #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
+ #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
+ 
+ static boolean group_match_null_string_p _RE_ARGS((unsigned char **p,
+ 						   unsigned char *end,
+ 					    register_info_type *reg_info));
+ static boolean alt_match_null_string_p _RE_ARGS((unsigned char *p,
+ 						 unsigned char *end,
+ 					  register_info_type *reg_info));
+ static boolean common_op_match_null_string_p _RE_ARGS((unsigned char **p,
+ 						       unsigned char *end,
+ 						register_info_type *reg_info));
+ static int bcmp_translate _RE_ARGS((const char *s1, const char *s2,
+ 				    int len, char *translate));
+ 
+ /* Call this when have matched a real character; it sets `matched' flags
+    for the subexpressions which we are currently inside.  Also records
+    that those subexprs have matched.  */
+ #define SET_REGS_MATCHED()						\
+   do									\
+     {									\
+       active_reg_t r;							\
+       for (r = lowest_active_reg; r <= highest_active_reg; r++)		\
+         {								\
+           MATCHED_SOMETHING (reg_info[r])				\
+             = EVER_MATCHED_SOMETHING (reg_info[r])			\
+             = 1;							\
+         }								\
+     }									\
+   while (0)
+ 
+ 
+ /* This converts PTR, a pointer into one of the search strings `string1'
+    and `string2' into an offset from the beginning of that string.  */
+ #define POINTER_TO_OFFSET(ptr)						\
+   (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
+ 
+ /* Registers are set to a sentinel when they haven't yet matched.  */
+ #define REG_UNSET_VALUE ((char *) -1)
+ #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
+ 
+ 
+ /* Macros for dealing with the split strings in re_match_2.  */
+ 
+ #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
+ 
+ /* Call before fetching a character with *d.  This switches over to
+    string2 if necessary.  */
+ #define PREFETCH()							\
+   while (d == dend)						    	\
+     {									\
+       /* End of string2 => fail.  */					\
+       if (dend == end_match_2) 						\
+         goto fail;							\
+       /* End of string1 => advance to string2.  */ 			\
+       d = string2;						        \
+       dend = end_match_2;						\
+     }
+ 
+ 
+ /* Test if at very beginning or at very end of the virtual concatenation
+    of `string1' and `string2'.  If only one string, it's `string2'.  */
+ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
+ #define AT_STRINGS_END(d) ((d) == end2)	
+ 
+ 
+ /* Test if D points to a character which is word-constituent.  We have
+    two special cases to check for: if past the end of string1, look at
+    the first character in string2; and if before the beginning of
+    string2, look at the last character in string1.  */
+ #define WORDCHAR_P(d)							\
+   (SYNTAX ((d) == end1 ? *string2					\
+            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
+    == Sword)
+ 
+ /* Test if the character before D and the one at D differ with respect
+    to being word-constituent.  */
+ #define AT_WORD_BOUNDARY(d)						\
+   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
+    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
+ 
+ 
+ /* Free everything we malloc.  */
+ #ifdef REGEX_MALLOC
+ #define FREE_VAR(var) if (var) free (var); var = NULL
+ #define FREE_VARIABLES()						\
+   do {									\
+     FREE_VAR (fail_stack.stack);					\
+     FREE_VAR (regstart);						\
+     FREE_VAR (regend);							\
+     FREE_VAR (old_regstart);						\
+     FREE_VAR (old_regend);						\
+     FREE_VAR (best_regstart);						\
+     FREE_VAR (best_regend);						\
+     FREE_VAR (reg_info);						\
+     FREE_VAR (reg_dummy);						\
+     FREE_VAR (reg_info_dummy);						\
+   } while (0)
+ #else /* not REGEX_MALLOC */
+ /* Some MIPS systems (at least) want this to free alloca'd storage.  */
+ #define FREE_VARIABLES() alloca (0)
+ #endif /* not REGEX_MALLOC */
+ 
+ 
+ /* These values must meet several constraints.  They must not be valid
+    register values; since we have a limit of 255 registers (because
+    we use only one byte in the pattern for the register number), we can
+    use numbers larger than 255.  They must differ by 1, because of
+    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
+    be larger than the value for the highest register, so we do not try
+    to actually save any registers when none are active.  */
+ #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
+ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
+ 
+ /* Matching routines.  */
+ 
+ #ifndef emacs   /* Emacs never uses this.  */
+ /* re_match is like re_match_2 except it takes only a single string.  */
+ 
+ int
+ re_match (bufp, string, size, pos, regs)
+      struct re_pattern_buffer *bufp;
+      const char *string;
+      int size, pos;
+      struct re_registers *regs;
+  {
+   return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size); 
+ }
+ #endif /* not emacs */
+ 
+ 
+ /* re_match_2 matches the compiled pattern in BUFP against the
+    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
+    and SIZE2, respectively).  We start matching at POS, and stop
+    matching at STOP.
+    
+    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
+    store offsets for the substring each group matched in REGS.  See the
+    documentation for exactly how many groups we fill.
+ 
+    We return -1 if no match, -2 if an internal error (such as the
+    failure stack overflowing).  Otherwise, we return the length of the
+    matched substring.  */
+ 
+ int
+ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+      struct re_pattern_buffer *bufp;
+      const char *string1, *string2;
+      int size1, size2;
+      int pos;
+      struct re_registers *regs;
+      int stop;
+ {
+   /* General temporaries.  */
+   int mcnt;
+   unsigned char *p1;
+ 
+   /* Just past the end of the corresponding string.  */
+   const char *end1, *end2;
+ 
+   /* Pointers into string1 and string2, just past the last characters in
+      each to consider matching.  */
+   const char *end_match_1, *end_match_2;
+ 
+   /* Where we are in the data, and the end of the current string.  */
+   const char *d, *dend;
+   
+   /* Where we are in the pattern, and the end of the pattern.  */
+   unsigned char *p = bufp->buffer;
+   register unsigned char *pend = p + bufp->used;
+ 
+   /* We use this to map every character in the string.  */
+   char *translate = bufp->translate;
+ 
+   /* Failure point stack.  Each place that can handle a failure further
+      down the line pushes a failure point on this stack.  It consists of
+      restart, regend, and reg_info for all registers corresponding to
+      the subexpressions we're currently inside, plus the number of such
+      registers, and, finally, two char *'s.  The first char * is where
+      to resume scanning the pattern; the second one is where to resume
+      scanning the strings.  If the latter is zero, the failure point is
+      a ``dummy''; if a failure happens and the failure point is a dummy,
+      it gets discarded and the next next one is tried.  */
+   fail_stack_type fail_stack;
+ #ifdef DEBUG
+   static unsigned failure_id = 0;
+   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
+ #endif
+ 
+   /* We fill all the registers internally, independent of what we
+      return, for use in backreferences.  The number here includes
+      an element for register zero.  */
+   size_t num_regs = bufp->re_nsub + 1;
+   
+   /* The currently active registers.  */
+   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ 
+   /* Information on the contents of registers. These are pointers into
+      the input strings; they record just what was matched (on this
+      attempt) by a subexpression part of the pattern, that is, the
+      regnum-th regstart pointer points to where in the pattern we began
+      matching and the regnum-th regend points to right after where we
+      stopped matching the regnum-th subexpression.  (The zeroth register
+      keeps track of what the whole pattern matches.)  */
+   const char **regstart = 0, **regend = 0;
+ 
+   /* If a group that's operated upon by a repetition operator fails to
+      match anything, then the register for its start will need to be
+      restored because it will have been set to wherever in the string we
+      are when we last see its open-group operator.  Similarly for a
+      register's end.  */
+   const char **old_regstart = 0, **old_regend = 0;
+ 
+   /* The is_active field of reg_info helps us keep track of which (possibly
+      nested) subexpressions we are currently in. The matched_something
+      field of reg_info[reg_num] helps us tell whether or not we have
+      matched any of the pattern so far this time through the reg_num-th
+      subexpression.  These two fields get reset each time through any
+      loop their register is in.  */
+   register_info_type *reg_info = 0; 
+ 
+   /* The following record the register info as found in the above
+      variables when we find a match better than any we've seen before. 
+      This happens as we backtrack through the failure points, which in
+      turn happens only if we have not yet matched the entire string. */
+   unsigned best_regs_set = false;
+   const char **best_regstart = 0, **best_regend = 0;
+   
+   /* Logically, this is `best_regend[0]'.  But we don't want to have to
+      allocate space for that if we're not allocating space for anything
+      else (see below).  Also, we never need info about register 0 for
+      any of the other register vectors, and it seems rather a kludge to
+      treat `best_regend' differently than the rest.  So we keep track of
+      the end of the best match so far in a separate variable.  We
+      initialize this to NULL so that when we backtrack the first time
+      and need to test it, it's not garbage.  */
+   const char *match_end = NULL;
+ 
+   /* Used when we pop values we don't care about.  */
+   const char **reg_dummy = 0;
+   register_info_type *reg_info_dummy = 0;
+ 
+ #ifdef DEBUG
+   /* Counts the total number of registers pushed.  */
+   unsigned num_regs_pushed = 0; 	
+ #endif
+ 
+   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
+   
+   INIT_FAIL_STACK ();
+   
+   /* Do not bother to initialize all the register variables if there are
+      no groups in the pattern, as it takes a fair amount of time.  If
+      there are groups, we include space for register 0 (the whole
+      pattern), even though we never use it, since it simplifies the
+      array indexing.  We should fix this.  */
+   if (bufp->re_nsub)
+     {
+       regstart = REGEX_TALLOC (num_regs, const char *);
+       regend = REGEX_TALLOC (num_regs, const char *);
+       old_regstart = REGEX_TALLOC (num_regs, const char *);
+       old_regend = REGEX_TALLOC (num_regs, const char *);
+       best_regstart = REGEX_TALLOC (num_regs, const char *);
+       best_regend = REGEX_TALLOC (num_regs, const char *);
+       reg_info = REGEX_TALLOC (num_regs, register_info_type);
+       reg_dummy = REGEX_TALLOC (num_regs, const char *);
+       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
+ 
+       if (!(regstart && regend && old_regstart && old_regend && reg_info 
+             && best_regstart && best_regend && reg_dummy && reg_info_dummy)) 
+         {
+           FREE_VARIABLES ();
+           return -2;
+         }
+     }
+ #ifdef REGEX_MALLOC
+   else
+     {
+       /* We must initialize all our variables to NULL, so that
+          `FREE_VARIABLES' doesn't try to free them.  */
+       regstart = regend = old_regstart = old_regend = best_regstart
+         = best_regend = reg_dummy = NULL;
+       reg_info = reg_info_dummy = (register_info_type *) NULL;
+     }
+ #endif /* REGEX_MALLOC */
+ 
+   /* The starting position is bogus.  */
+   if (pos < 0 || pos > size1 + size2)
+     {
+       FREE_VARIABLES ();
+       return -1;
+     }
+     
+   /* Initialize subexpression text positions to -1 to mark ones that no
+      start_memory/stop_memory has been seen for. Also initialize the
+      register information struct.  */
+   for (mcnt = 1; mcnt < num_regs; mcnt++)
+     {
+       regstart[mcnt] = regend[mcnt] 
+         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
+         
+       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
+       IS_ACTIVE (reg_info[mcnt]) = 0;
+       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+     }
+   
+   /* We move `string1' into `string2' if the latter's empty -- but not if
+      `string1' is null.  */
+   if (size2 == 0 && string1 != NULL)
+     {
+       string2 = string1;
+       size2 = size1;
+       string1 = 0;
+       size1 = 0;
+     }
+   end1 = string1 + size1;
+   end2 = string2 + size2;
+ 
+   /* Compute where to stop matching, within the two strings.  */
+   if (stop <= size1)
+     {
+       end_match_1 = string1 + stop;
+       end_match_2 = string2;
+     }
+   else
+     {
+       end_match_1 = end1;
+       end_match_2 = string2 + stop - size1;
+     }
+ 
+   /* `p' scans through the pattern as `d' scans through the data. 
+      `dend' is the end of the input string that `d' points within.  `d'
+      is advanced into the following input string whenever necessary, but
+      this happens before fetching; therefore, at the beginning of the
+      loop, `d' can be pointing at the end of a string, but it cannot
+      equal `string2'.  */
+   if (size1 > 0 && pos <= size1)
+     {
+       d = string1 + pos;
+       dend = end_match_1;
+     }
+   else
+     {
+       d = string2 + pos - size1;
+       dend = end_match_2;
+     }
+ 
+   DEBUG_PRINT1 ("The compiled pattern is: ");
+   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
+   DEBUG_PRINT1 ("The string to match is: `");
+   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
+   DEBUG_PRINT1 ("'\n");
+   
+   /* This loops over pattern commands.  It exits by returning from the
+      function if the match is complete, or it drops through if the match
+      fails at this starting point in the input data.  */
+   for (;;)
+     {
+       DEBUG_PRINT2 ("\n0x%x: ", p);
+ 
+       if (p == pend)
+ 	{ /* End of pattern means we might have succeeded.  */
+           DEBUG_PRINT1 ("end of pattern ... ");
+           
+ 	  /* If we haven't matched the entire string, and we want the
+              longest match, try backtracking.  */
+           if (d != end_match_2)
+ 	    {
+               DEBUG_PRINT1 ("backtracking.\n");
+               
+               if (!FAIL_STACK_EMPTY ())
+                 { /* More failure points to try.  */
+                   boolean same_str_p = (FIRST_STRING_P (match_end) 
+ 	        	                == MATCHING_IN_FIRST_STRING);
+ 
+                   /* If exceeds best match so far, save it.  */
+                   if (!best_regs_set
+                       || (same_str_p && d > match_end)
+                       || (!same_str_p && !MATCHING_IN_FIRST_STRING))
+                     {
+                       best_regs_set = true;
+                       match_end = d;
+                       
+                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
+                       
+                       for (mcnt = 1; mcnt < num_regs; mcnt++)
+                         {
+                           best_regstart[mcnt] = regstart[mcnt];
+                           best_regend[mcnt] = regend[mcnt];
+                         }
+                     }
+                   goto fail;	       
+                 }
+ 
+               /* If no failure points, don't restore garbage.  */
+               else if (best_regs_set)   
+                 {
+   	        restore_best_regs:
+                   /* Restore best match.  It may happen that `dend ==
+                      end_match_1' while the restored d is in string2.
+                      For example, the pattern `x.*y.*z' against the
+                      strings `x-' and `y-z-', if the two strings are
+                      not consecutive in memory.  */
+                   DEBUG_PRINT1 ("Restoring best registers.\n");
+                   
+                   d = match_end;
+                   dend = ((d >= string1 && d <= end1)
+ 		           ? end_match_1 : end_match_2);
+ 
+ 		  for (mcnt = 1; mcnt < num_regs; mcnt++)
+ 		    {
+ 		      regstart[mcnt] = best_regstart[mcnt];
+ 		      regend[mcnt] = best_regend[mcnt];
+ 		    }
+                 }
+             } /* d != end_match_2 */
+ 
+           DEBUG_PRINT1 ("Accepting match.\n");
+ 
+           /* If caller wants register contents data back, do it.  */
+           if (regs && !bufp->no_sub)
+ 	    {
+               /* Have the register data arrays been allocated?  */
+               if (bufp->regs_allocated == REGS_UNALLOCATED)
+                 { /* No.  So allocate them with malloc.  We need one
+                      extra element beyond `num_regs' for the `-1' marker
+                      GNU code uses.  */
+                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
+                   regs->start = TALLOC (regs->num_regs, regoff_t);
+                   regs->end = TALLOC (regs->num_regs, regoff_t);
+                   if (regs->start == NULL || regs->end == NULL)
+                     return -2;
+                   bufp->regs_allocated = REGS_REALLOCATE;
+                 }
+               else if (bufp->regs_allocated == REGS_REALLOCATE)
+                 { /* Yes.  If we need more elements than were already
+                      allocated, reallocate them.  If we need fewer, just
+                      leave it alone.  */
+                   if (regs->num_regs < num_regs + 1)
+                     {
+                       regs->num_regs = num_regs + 1;
+                       RETALLOC (regs->start, regs->num_regs, regoff_t);
+                       RETALLOC (regs->end, regs->num_regs, regoff_t);
+                       if (regs->start == NULL || regs->end == NULL)
+                         return -2;
+                     }
+                 }
+               else
+ 		{
+ 		  /* These braces fend off a "empty body in an else-statement"
+ 		     warning under GCC when assert expands to nothing.  */
+ 		  assert (bufp->regs_allocated == REGS_FIXED);
+ 		}
+ 
+               /* Convert the pointer data in `regstart' and `regend' to
+                  indices.  Register zero has to be set differently,
+                  since we haven't kept track of any info for it.  */
+               if (regs->num_regs > 0)
+                 {
+                   regs->start[0] = pos;
+                   regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
+ 			          : d - string2 + size1);
+                 }
+               
+               /* Go through the first `min (num_regs, regs->num_regs)'
+                  registers, since that is all we initialized.  */
+ 	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
+ 		{
+                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
+                     regs->start[mcnt] = regs->end[mcnt] = -1;
+                   else
+                     {
+ 		      regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
+                       regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
+                     }
+ 		}
+               
+               /* If the regs structure we return has more elements than
+                  were in the pattern, set the extra elements to -1.  If
+                  we (re)allocated the registers, this is the case,
+                  because we always allocate enough to have at least one
+                  -1 at the end.  */
+               for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
+                 regs->start[mcnt] = regs->end[mcnt] = -1;
+ 	    } /* regs && !bufp->no_sub */
+ 
+           FREE_VARIABLES ();
+           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
+                         nfailure_points_pushed, nfailure_points_popped,
+                         nfailure_points_pushed - nfailure_points_popped);
+           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
+ 
+           mcnt = d - pos - (MATCHING_IN_FIRST_STRING 
+ 			    ? string1 
+ 			    : string2 - size1);
+ 
+           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
+ 
+           return mcnt;
+         }
+ 
+       /* Otherwise match next pattern command.  */
+ #ifdef SWITCH_ENUM_BUG
+       switch ((int) ((re_opcode_t) *p++))
+ #else
+       switch ((re_opcode_t) *p++)
+ #endif
+ 	{
+         /* Ignore these.  Used to ignore the n of succeed_n's which
+            currently have n == 0.  */
+         case no_op:
+           DEBUG_PRINT1 ("EXECUTING no_op.\n");
+           break;
+ 
+ 
+         /* Match the next n pattern characters exactly.  The following
+            byte in the pattern defines n, and the n bytes after that
+            are the characters to match.  */
+ 	case exactn:
+ 	  mcnt = *p++;
+           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
+ 
+           /* This is written out as an if-else so we don't waste time
+              testing `translate' inside the loop.  */
+           if (translate)
+ 	    {
+ 	      do
+ 		{
+ 		  unsigned char c;
+ 
+ 		  PREFETCH ();
+ 		  c = *d++;
+ 		  if (ismbchar (c)) {
+ 		    if (c != (unsigned char) *p++
+ 			|| !--mcnt	/* パターンが正しくコンパイルさ
+ 					   れている限り, このチェックは
+ 					   冗長だが念のため.  */
+ 			|| d == dend
+ 			|| (unsigned char) *d++ != (unsigned char) *p++)
+ 		      goto fail;
+ 		    continue;
+ 		  }
+ 		  if ((unsigned char) translate[c] != *p++)
+                     goto fail;
+ 		}
+ 	      while (--mcnt);
+ 	    }
+ 	  else
+ 	    {
+ 	      do
+ 		{
+ #if 0
+ 		  /* 他の部分では, string1 と string2 にマルチバイト文字
+ 		     が跨るのを許していない.  このことを速度を犠牲にして
+ 		     もチェックする場合は, ここと次の `#if 0' を `#if 1' 
+ 		     に変えること.  */
+ 		  unsigned char c;
+ 
+ #endif
+ 		  PREFETCH ();
+ #if 0
+ 		  c = *d++;
+ 		  if (ismbchar (c)) {
+ 		    if (c != (unsigned char) *p++
+ 			|| !--mcnt
+ 			|| d == dend)
+ 		      goto fail;
+ 		    c = *d++;
+ 		  }
+ 		  if (c != (unsigned char) *p++) goto fail;
+ #else
+ 		  if (*d++ != (char) *p++) goto fail;
+ #endif
+ 		}
+ 	      while (--mcnt);
+ 	    }
+ 	  SET_REGS_MATCHED ();
+           break;
+ 
+ 
+         /* Match any character except possibly a newline or a null.  */
+ 	case anychar:
+           DEBUG_PRINT1 ("EXECUTING anychar.\n");
+ 
+           PREFETCH ();
+ 	  if (ismbchar (*d)) {
+ 	    if (d + 1 == dend || d[1] == '\n' || d[1] == '\0')
+ 	      /* 無効なマルチバイト文字にはマッチさせない.  ここでは, 簡
+ 		 単のため２バイト目が '\n', '\0' のものだけを無効とする.  */
+ 	      goto fail;
+ 	    SET_REGS_MATCHED ();
+ 	    DEBUG_PRINT2 ("  Matched `%d'.\n", EXTRACT_MBC (&d[0]));
+ 	    d += 2;
+ 	    break;
+ 	  }
+ 
+           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
+               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
+ 	    goto fail;
+ 
+           SET_REGS_MATCHED ();
+           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
+           d++;
+ 	  break;
+ 
+ 
+ 	case charset:
+ 	case charset_not:
+ 	  {
+ 	    register unsigned short c;
+ 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
+ 
+             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
+ 
+ 	    PREFETCH ();
+ 	    c = (unsigned char) *d;
+ 	    if (ismbchar (c)) {
+ 	      c <<= 8;
+ 	      if (d + 1 != dend)
+ 		c |= (unsigned char) d[1];
+ 	    }
+ 	    else
+ 	      c = TRANSLATE (c); /* The character to match.  */
+ 
+ 	    not = is_in_list (c, p);
+ 
+ 	    p += 1 + *p + 2 + EXTRACT_UNSIGNED (&p[1 + *p])*4;
+ 
+ 	    if (!not) goto fail;
+             
+ 	    SET_REGS_MATCHED ();
+             d++;
+ 	    if (d != dend && c >= 1 << BYTEWIDTH)
+ 	      d++;
+ 	    break;
+ 	  }
+ 
+ 
+         /* The beginning of a group is represented by start_memory.
+            The arguments are the register number in the next byte, and the
+            number of groups inner to this one in the next.  The text
+            matched within the group is recorded (in the internal
+            registers data structure) under the register number.  */
+         case start_memory:
+ 	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
+ 
+           /* Find out if this group can match the empty string.  */
+ 	  p1 = p;		/* To send to group_match_null_string_p.  */
+           
+           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
+             REG_MATCH_NULL_STRING_P (reg_info[*p]) 
+               = group_match_null_string_p (&p1, pend, reg_info);
+ 
+           /* Save the position in the string where we were the last time
+              we were at this open-group operator in case the group is
+              operated upon by a repetition operator, e.g., with `(a*)*b'
+              against `ab'; then we want to ignore where we are now in
+              the string in case this attempt to match fails.  */
+           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
+                              : regstart[*p];
+ 	  DEBUG_PRINT2 ("  old_regstart: %d\n", 
+ 			 POINTER_TO_OFFSET (old_regstart[*p]));
+ 
+           regstart[*p] = d;
+ 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
+ 
+           IS_ACTIVE (reg_info[*p]) = 1;
+           MATCHED_SOMETHING (reg_info[*p]) = 0;
+           
+           /* This is the new highest active register.  */
+           highest_active_reg = *p;
+           
+           /* If nothing was active before, this is the new lowest active
+              register.  */
+           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+             lowest_active_reg = *p;
+ 
+           /* Move past the register number and inner group count.  */
+           p += 2;
+           break;
+ 
+ 
+         /* The stop_memory opcode represents the end of a group.  Its
+            arguments are the same as start_memory's: the register
+            number, and the number of inner groups.  */
+ 	case stop_memory:
+ 	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
+              
+           /* We need to save the string position the last time we were at
+              this close-group operator in case the group is operated
+              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
+              against `aba'; then we want to ignore where we are now in
+              the string in case this attempt to match fails.  */
+           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
+ 			   : regend[*p];
+ 	  DEBUG_PRINT2 ("      old_regend: %d\n", 
+ 			 POINTER_TO_OFFSET (old_regend[*p]));
+ 
+           regend[*p] = d;
+ 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
+ 
+           /* This register isn't active anymore.  */
+           IS_ACTIVE (reg_info[*p]) = 0;
+           
+           /* If this was the only register active, nothing is active
+              anymore.  */
+           if (lowest_active_reg == highest_active_reg)
+             {
+               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+             }
+           else
+             { /* We must scan for the new highest active register, since
+                  it isn't necessarily one less than now: consider
+                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
+                  new highest active register is 1.  */
+               unsigned char r = *p - 1;
+               while (r > 0 && !IS_ACTIVE (reg_info[r]))
+                 r--;
+               
+               /* If we end up at register zero, that means that we saved
+                  the registers as the result of an `on_failure_jump', not
+                  a `start_memory', and we jumped to past the innermost
+                  `stop_memory'.  For example, in ((.)*) we save
+                  registers 1 and 2 as a result of the *, but when we pop
+                  back to the second ), we are at the stop_memory 1.
+                  Thus, nothing is active.  */
+ 	      if (r == 0)
+                 {
+                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+                 }
+               else
+                 highest_active_reg = r;
+             }
+           
+           /* If just failed to match something this time around with a
+              group that's operated on by a repetition operator, try to
+              force exit from the ``loop'', and restore the register
+              information for this group that we had before trying this
+              last match.  */
+           if ((!MATCHED_SOMETHING (reg_info[*p])
+                || (re_opcode_t) p[-3] == start_memory)
+ 	      && (p + 2) < pend)              
+             {
+               boolean is_a_jump_n = false;
+               
+               p1 = p + 2;
+               mcnt = 0;
+               switch ((re_opcode_t) *p1++)
+                 {
+                   case jump_n:
+ 		    is_a_jump_n = true;
+                   case pop_failure_jump:
+ 		  case maybe_pop_jump:
+ 		  case jump:
+ 		  case dummy_failure_jump:
+                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ 		    if (is_a_jump_n)
+ 		      p1 += 2;
+                     break;
+                   
+                   default:
+                     /* do nothing */ ;
+                 }
+ 	      p1 += mcnt;
+         
+               /* If the next operation is a jump backwards in the pattern
+ 	         to an on_failure_jump right before the start_memory
+                  corresponding to this stop_memory, exit from the loop
+                  by forcing a failure after pushing on the stack the
+                  on_failure_jump's jump in the pattern, and d.  */
+               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
+                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
+ 		{
+                   /* If this group ever matched anything, then restore
+                      what its registers were before trying this last
+                      failed match, e.g., with `(a*)*b' against `ab' for
+                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
+                      against `aba' for regend[3].
+                      
+                      Also restore the registers for inner groups for,
+                      e.g., `((a*)(b*))*' against `aba' (register 3 would
+                      otherwise get trashed).  */
+                      
+                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
+ 		    {
+ 		      unsigned r; 
+         
+                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
+                       
+ 		      /* Restore this and inner groups' (if any) registers.  */
+                       for (r = *p; r < *p + *(p + 1); r++)
+                         {
+                           regstart[r] = old_regstart[r];
+ 
+                           /* xx why this test?  */
+ 			  if (old_regend[r] >= regstart[r])
+                             regend[r] = old_regend[r];
+                         }     
+                     }
+ 		  p1++;
+                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
+                   PUSH_FAILURE_POINT2(p1 + mcnt, d, -2);
+ 
+                   goto fail;
+                 }
+             }
+           
+           /* Move past the register number and the inner group count.  */
+           p += 2;
+           break;
+ 
+ 
+ 	/* \<digit> has been turned into a `duplicate' command which is
+            followed by the numeric value of <digit> as the register number.  */
+         case duplicate:
+ 	  {
+ 	    register const char *d2, *dend2;
+ 	    int regno = *p++;   /* Get which register to match against.  */
+ 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
+ 
+ 	    /* Can't back reference a group which we've never matched.  */
+             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
+               goto fail;
+               
+             /* Where in input to try to start matching.  */
+             d2 = regstart[regno];
+             
+             /* Where to stop matching; if both the place to start and
+                the place to stop matching are in the same string, then
+                set to the place to stop, otherwise, for now have to use
+                the end of the first string.  */
+ 
+             dend2 = ((FIRST_STRING_P (regstart[regno]) 
+ 		      == FIRST_STRING_P (regend[regno]))
+ 		     ? regend[regno] : end_match_1);
+ 	    for (;;)
+ 	      {
+ 		/* If necessary, advance to next segment in register
+                    contents.  */
+ 		while (d2 == dend2)
+ 		  {
+ 		    if (dend2 == end_match_2) break;
+ 		    if (dend2 == regend[regno]) break;
+ 
+                     /* End of string1 => advance to string2. */
+                     d2 = string2;
+                     dend2 = regend[regno];
+ 		  }
+ 		/* At end of register contents => success */
+ 		if (d2 == dend2) break;
+ 
+ 		/* If necessary, advance to next segment in data.  */
+ 		PREFETCH ();
+ 
+ 		/* How many characters left in this segment to match.  */
+ 		mcnt = dend - d;
+                 
+ 		/* Want how many consecutive characters we can match in
+                    one shot, so, if necessary, adjust the count.  */
+                 if (mcnt > dend2 - d2)
+ 		  mcnt = dend2 - d2;
+                   
+ 		/* Compare that many; failure if mismatch, else move
+                    past them.  */
+ 		if (translate 
+                     ? bcmp_translate (d, d2, mcnt, translate) 
+                     : bcmp (d, d2, mcnt))
+ 		  goto fail;
+ 		d += mcnt, d2 += mcnt;
+ 	      }
+ 	  }
+ 	  break;
+ 
+ 
+         /* begline matches the empty string at the beginning of the string
+            (unless `not_bol' is set in `bufp'), and, if
+            `newline_anchor' is set, after newlines.  */
+ 	case begline:
+           DEBUG_PRINT1 ("EXECUTING begline.\n");
+           
+           if (AT_STRINGS_BEG (d))
+             {
+               if (!bufp->not_bol) break;
+             }
+           else if (d[-1] == '\n' && bufp->newline_anchor)
+             {
+               break;
+             }
+           /* In all other cases, we fail.  */
+           goto fail;
+ 
+ 
+         /* endline is the dual of begline.  */
+ 	case endline:
+           DEBUG_PRINT1 ("EXECUTING endline.\n");
+ 
+           if (AT_STRINGS_END (d))
+             {
+               if (!bufp->not_eol) break;
+             }
+           
+           /* We have to ``prefetch'' the next character.  */
+           else if ((d == end1 ? *string2 : *d) == '\n'
+                    && bufp->newline_anchor)
+             {
+               break;
+             }
+           goto fail;
+ 
+ 
+ 	/* Match at the very beginning of the data.  */
+         case begbuf:
+           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
+           if (AT_STRINGS_BEG (d))
+             break;
+           goto fail;
+ 
+ 
+ 	/* Match at the very end of the data.  */
+         case endbuf:
+           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
+ 	  if (AT_STRINGS_END (d))
+ 	    break;
+           goto fail;
+ 
+ 
+         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
+            pushes NULL as the value for the string on the stack.  Then
+            `pop_failure_point' will keep the current value for the
+            string, instead of restoring it.  To see why, consider
+            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
+            then the . fails against the \n.  But the next thing we want
+            to do is match the \n against the \n; if we restored the
+            string value, we would be back at the foo.
+            
+            Because this is used only in specific cases, we don't need to
+            check all the things that `on_failure_jump' does, to make
+            sure the right things get saved on the stack.  Hence we don't
+            share its code.  The only reason to push anything on the
+            stack at all is that otherwise we would have to change
+            `anychar's code to do something besides goto fail in this
+            case; that seems worse than this.  */
+         case on_failure_keep_string_jump:
+           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
+           
+           EXTRACT_NUMBER_AND_INCR (mcnt, p);
+           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
+ 
+           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
+           PUSH_FAILURE_POINT2(p + mcnt, NULL, -2);
+           break;
+ 
+ 
+ 	/* Uses of on_failure_jump:
+         
+            Each alternative starts with an on_failure_jump that points
+            to the beginning of the next alternative.  Each alternative
+            except the last ends with a jump that in effect jumps past
+            the rest of the alternatives.  (They really jump to the
+            ending jump of the following alternative, because tensioning
+            these jumps is a hassle.)
+ 
+            Repeats start with an on_failure_jump that points past both
+            the repetition text and either the following jump or
+            pop_failure_jump back to this on_failure_jump.  */
+ 	case on_failure_jump:
+         on_failure:
+           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
+ 
+           EXTRACT_NUMBER_AND_INCR (mcnt, p);
+           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
+ 
+           /* If this on_failure_jump comes right before a group (i.e.,
+              the original * applied to a group), save the information
+              for that group and all inner ones, so that if we fail back
+              to this point, the group's information will be correct.
+              For example, in \(a*\)*\1, we need the preceding group,
+              and in \(\(a*\)b*\)\2, we need the inner group.  */
+ 
+           /* We can't use `p' to check ahead because we push
+              a failure point to `p + mcnt' after we do this.  */
+           p1 = p;
+ 
+           /* We need to skip no_op's before we look for the
+              start_memory in case this on_failure_jump is happening as
+              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
+              against aba.  */
+           while (p1 < pend && (re_opcode_t) *p1 == no_op)
+             p1++;
+ 
+           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
+             {
+               /* We have a new highest active register now.  This will
+                  get reset at the start_memory we are about to get to,
+                  but we will have saved all the registers relevant to
+                  this repetition op, as described above.  */
+               highest_active_reg = *(p1 + 1) + *(p1 + 2);
+               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+                 lowest_active_reg = *(p1 + 1);
+             }
+ 
+           DEBUG_PRINT1 (":\n");
+           PUSH_FAILURE_POINT (p + mcnt, d, -2);
+           PUSH_FAILURE_POINT2(p + mcnt, d, -2);
+           break;
+ 
+ 
+         /* A smart repeat ends with `maybe_pop_jump'.
+ 	   We change it to either `pop_failure_jump' or `jump'.  */
+         case maybe_pop_jump:
+           EXTRACT_NUMBER_AND_INCR (mcnt, p);
+           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
+           {
+ 	    register unsigned char *p2 = p;
+ 
+             /* Compare the beginning of the repeat with what in the
+                pattern follows its end. If we can establish that there
+                is nothing that they would both match, i.e., that we
+                would have to backtrack because of (as in, e.g., `a*a')
+                then we can change to pop_failure_jump, because we'll
+                never have to backtrack.
+                
+                This is not true in the case of alternatives: in
+                `(a|ab)*' we do need to backtrack to the `ab' alternative
+                (e.g., if the string was `ab').  But instead of trying to
+                detect that here, the alternative has put on a dummy
+                failure point which is what we will end up popping.  */
+ 
+ 	    /* Skip over open/close-group commands.  */
+ 	    while (p2 + 2 < pend
+ 		   && ((re_opcode_t) *p2 == stop_memory
+ 		       || (re_opcode_t) *p2 == start_memory))
+ 	      p2 += 3;			/* Skip over args, too.  */
+ 
+             /* If we're at the end of the pattern, we can change.  */
+             if (p2 == pend)
+ 	      {
+ 		/* Consider what happens when matching ":\(.*\)"
+ 		   against ":/".  I don't really understand this code
+ 		   yet.  */
+   	        p[-3] = (unsigned char) pop_failure_jump;
+                 DEBUG_PRINT1
+                   ("  End of pattern: change to `pop_failure_jump'.\n");
+               }
+ 
+             else if ((re_opcode_t) *p2 == exactn
+ 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
+ 	      {
+ 		register unsigned short c
+                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
+ 		p1 = p + mcnt;
+ 
+                 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+                    to the `maybe_finalize_jump' of this case.  Examine what 
+                    follows.  */
+                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
+                   {
+   		    p[-3] = (unsigned char) pop_failure_jump;
+                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
+                                   c, p1[5]);
+                   }
+                   
+ 		else if ((re_opcode_t) p1[3] == charset
+ 			 || (re_opcode_t) p1[3] == charset_not)
+ 		  {
+ 		    if (ismbchar (c))
+ 		      c = c << 8 | p2[3];
+ 
+ 		    /* `is_in_list()' is TRUE if c would match, which means
+                         that we can't change to pop_failure_jump.  */
+ 		    if (!is_in_list (c, p1 + 4))
+                       {
+   		        p[-3] = (unsigned char) pop_failure_jump;
+                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
+                       }
+ 		  }
+ 	      }
+ 	  }
+ 	  p -= 2;		/* Point at relative address again.  */
+ 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
+ 	    {
+ 	      p[-1] = (unsigned char) jump;
+               DEBUG_PRINT1 ("  Match => jump.\n");
+ 	      goto unconditional_jump;
+ 	    }
+         /* Note fall through.  */
+ 
+ 
+ 	/* The end of a simple repeat has a pop_failure_jump back to
+            its matching on_failure_jump, where the latter will push a
+            failure point.  The pop_failure_jump takes off failure
+            points put on by this pop_failure_jump's matching
+            on_failure_jump; we got through the pattern to here from the
+            matching on_failure_jump, so didn't fail.  */
+         case pop_failure_jump:
+           {
+             /* We need to pass separate storage for the lowest and
+                highest registers, even though we don't care about the
+                actual values.  Otherwise, we will restore only one
+                register from the stack, since lowest will == highest in
+                `pop_failure_point'.  */
+             active_reg_t dummy_low_reg, dummy_high_reg;
+             unsigned char *pdummy;
+             const char *sdummy;
+ 
+             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
+             POP_FAILURE_POINT (sdummy, pdummy,
+                                dummy_low_reg, dummy_high_reg,
+                                reg_dummy, reg_dummy, reg_info_dummy);
+           }
+           /* Note fall through.  */
+ 
+           
+         /* Unconditionally jump (without popping any failure points).  */
+         case jump:
+ 	unconditional_jump:
+ 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
+           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
+ 	  p += mcnt;				/* Do the jump.  */
+           DEBUG_PRINT2 ("(to 0x%x).\n", p);
+ 	  break;
+ 
+ 	
+         /* We need this opcode so we can detect where alternatives end
+            in `group_match_null_string_p' et al.  */
+         case jump_past_alt:
+           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
+           goto unconditional_jump;
+ 
+ 
+         /* Normally, the on_failure_jump pushes a failure point, which
+            then gets popped at pop_failure_jump.  We will end up at
+            pop_failure_jump, also, and with a pattern of, say, `a+', we
+            are skipping over the on_failure_jump, so we have to push
+            something meaningless for pop_failure_jump to pop.  */
+         case dummy_failure_jump:
+           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
+           /* It doesn't matter what we push for the string here.  What
+              the code at `fail' tests is the value for the pattern.  */
+           PUSH_FAILURE_POINT (0, 0, -2);
+           PUSH_FAILURE_POINT2(0, 0, -2);
+           goto unconditional_jump;
+ 
+ 
+         /* At the end of an alternative, we need to push a dummy failure
+            point in case we are followed by a `pop_failure_jump', because
+            we don't want the failure point for the alternative to be
+            popped.  For example, matching `(a|ab)*' against `aab'
+            requires that we match the `ab' alternative.  */
+         case push_dummy_failure:
+           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
+           /* See comments just above at `dummy_failure_jump' about the
+              two zeroes.  */
+           PUSH_FAILURE_POINT (0, 0, -2);
+           PUSH_FAILURE_POINT2(0, 0, -2);
+           break;
+ 
+         /* Have to succeed matching what follows at least n times.
+            After that, handle like `on_failure_jump'.  */
+         case succeed_n: 
+           EXTRACT_NUMBER (mcnt, p + 2);
+           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
+ 
+           assert (mcnt >= 0);
+           /* Originally, this is how many times we HAVE to succeed.  */
+           if (mcnt > 0)
+             {
+                mcnt--;
+ 	       p += 2;
+                STORE_NUMBER_AND_INCR (p, mcnt);
+                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
+             }
+ 	  else if (mcnt == 0)
+             {
+               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
+ 	      p[2] = (unsigned char) no_op;
+               p[3] = (unsigned char) no_op;
+               goto on_failure;
+             }
+           break;
+         
+         case jump_n: 
+           EXTRACT_NUMBER (mcnt, p + 2);
+           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
+ 
+           /* Originally, this is how many times we CAN jump.  */
+           if (mcnt)
+             {
+                mcnt--;
+                STORE_NUMBER (p + 2, mcnt);
+ 	       goto unconditional_jump;	     
+             }
+           /* If don't have to jump any more, skip over the rest of command.  */
+ 	  else      
+ 	    p += 4;		     
+           break;
+         
+ 	case set_number_at:
+ 	  {
+             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
+ 
+             EXTRACT_NUMBER_AND_INCR (mcnt, p);
+             p1 = p + mcnt;
+             EXTRACT_NUMBER_AND_INCR (mcnt, p);
+             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
+ 	    STORE_NUMBER (p1, mcnt);
+             break;
+           }
+ 
+         case wordbound:
+           DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+           if (AT_WORD_BOUNDARY (d))
+ 	    break;
+           goto fail;
+ 
+ 	case notwordbound:
+           DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+ 	  if (AT_WORD_BOUNDARY (d))
+ 	    goto fail;
+           break;
+ 
+ 	case wordbeg:
+           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
+ 	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
+ 	    break;
+           goto fail;
+ 
+ 	case wordend:
+           DEBUG_PRINT1 ("EXECUTING wordend.\n");
+ 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
+               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
+ 	    break;
+           goto fail;
+ 
+ #ifdef emacs
+ #ifdef emacs19
+   	case before_dot:
+           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
+  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
+   	    goto fail;
+   	  break;
+   
+   	case at_dot:
+           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
+   	    goto fail;
+   	  break;
+   
+   	case after_dot:
+           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
+           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
+   	    goto fail;
+   	  break;
+ #else /* not emacs19 */
+ 	case at_dot:
+           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+ 	  if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
+ 	    goto fail;
+ 	  break;
+ #endif /* not emacs19 */
+ 
+ 	case syntaxspec:
+           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
+ 	  mcnt = *p++;
+ 	  goto matchsyntax;
+ 
+         case wordchar:
+           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
+ 	  mcnt = (int) Sword;
+         matchsyntax:
+ 	  PREFETCH ();
+ 	  if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
+             goto fail;
+           SET_REGS_MATCHED ();
+ 	  break;
+ 
+ 	case notsyntaxspec:
+           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
+ 	  mcnt = *p++;
+ 	  goto matchnotsyntax;
+ 
+         case notwordchar:
+           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
+ 	  mcnt = (int) Sword;
+         matchnotsyntax:
+ 	  PREFETCH ();
+ 	  if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
+             goto fail;
+ 	  SET_REGS_MATCHED ();
+           break;
+ 
+ #else /* not emacs */
+ 	case wordchar:
+           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
+ 	  PREFETCH ();
+           if (!WORDCHAR_P (d))
+             goto fail;
+ 	  SET_REGS_MATCHED ();
+           d++;
+ 	  break;
+ 	  
+ 	case notwordchar:
+           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
+ 	  PREFETCH ();
+ 	  if (WORDCHAR_P (d))
+             goto fail;
+           SET_REGS_MATCHED ();
+           d++;
+ 	  break;
+ #endif /* not emacs */
+           
+         default:
+           abort ();
+ 	}
+       continue;  /* Successfully executed one pattern command; keep going.  */
+ 
+ 
+     /* We goto here if a matching operation fails. */
+     fail:
+       if (!FAIL_STACK_EMPTY ())
+ 	{ /* A restart point is known.  Restore to that state.  */
+           DEBUG_PRINT1 ("\nFAIL:\n");
+           POP_FAILURE_POINT (d, p,
+                              lowest_active_reg, highest_active_reg,
+                              regstart, regend, reg_info);
+ 
+           /* If this failure point is a dummy, try the next one.  */
+           if (!p)
+ 	    goto fail;
+ 
+           /* If we failed to the end of the pattern, don't examine *p.  */
+ 	  assert (p <= pend);
+           if (p < pend)
+             {
+               boolean is_a_jump_n = false;
+               
+               /* If failed to a backwards jump that's part of a repetition
+                  loop, need to pop this failure point and use the next one.  */
+               switch ((re_opcode_t) *p)
+                 {
+                 case jump_n:
+                   is_a_jump_n = true;
+                 case maybe_pop_jump:
+                 case pop_failure_jump:
+                 case jump:
+                   p1 = p + 1;
+                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+                   p1 += mcnt;	
+ 
+                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
+                       || (!is_a_jump_n
+                           && (re_opcode_t) *p1 == on_failure_jump))
+                     goto fail;
+                   break;
+                 default:
+                   /* do nothing */ ;
+                 }
+             }
+ 
+           if (d >= string1 && d <= end1)
+ 	    dend = end_match_1;
+         }
+       else
+         break;   /* Matching at this starting point really fails.  */
+     } /* for (;;) */
+ 
+   if (best_regs_set)
+     goto restore_best_regs;
+ 
+   FREE_VARIABLES ();
+ 
+   return -1;         			/* Failure to match.  */
+ } /* re_match_2 */
+ 
+ /* Subroutine definitions for re_match_2.  */
+ 
+ 
+ /* We are passed P pointing to a register number after a start_memory.
+    
+    Return true if the pattern up to the corresponding stop_memory can
+    match the empty string, and false otherwise.
+    
+    If we find the matching stop_memory, sets P to point to one past its number.
+    Otherwise, sets P to an undefined byte less than or equal to END.
+ 
+    We don't handle duplicates properly (yet).  */
+ 
+ static boolean
+ group_match_null_string_p (p, end, reg_info)
+     unsigned char **p, *end;
+     register_info_type *reg_info;
+ {
+   int mcnt;
+   /* Point to after the args to the start_memory.  */
+   unsigned char *p1 = *p + 2;
+   
+   while (p1 < end)
+     {
+       /* Skip over opcodes that can match nothing, and return true or
+ 	 false, as appropriate, when we get to one that can't, or to the
+          matching stop_memory.  */
+       
+       switch ((re_opcode_t) *p1)
+         {
+         /* Could be either a loop or a series of alternatives.  */
+         case on_failure_jump:
+           p1++;
+           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+           
+           /* If the next operation is not a jump backwards in the
+ 	     pattern.  */
+ 
+ 	  if (mcnt >= 0)
+ 	    {
+               /* Go through the on_failure_jumps of the alternatives,
+                  seeing if any of the alternatives cannot match nothing.
+                  The last alternative starts with only a jump,
+                  whereas the rest start with on_failure_jump and end
+                  with a jump, e.g., here is the pattern for `a|b|c':
+ 
+                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
+                  /exactn/1/c						
+ 
+                  So, we have to first go through the first (n-1)
+                  alternatives and then deal with the last one separately.  */
+ 
+ 
+               /* Deal with the first (n-1) alternatives, which start
+                  with an on_failure_jump (see above) that jumps to right
+                  past a jump_past_alt.  */
+ 
+               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
+                 {
+                   /* `mcnt' holds how many bytes long the alternative
+                      is, including the ending `jump_past_alt' and
+                      its number.  */
+ 
+                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3, 
+ 				                      reg_info))
+                     return false;
+ 
+                   /* Move to right after this alternative, including the
+ 		     jump_past_alt.  */
+                   p1 += mcnt;	
+ 
+                   /* Break if it's the beginning of an n-th alternative
+                      that doesn't begin with an on_failure_jump.  */
+                   if ((re_opcode_t) *p1 != on_failure_jump)
+                     break;
+ 		
+ 		  /* Still have to check that it's not an n-th
+ 		     alternative that starts with an on_failure_jump.  */
+ 		  p1++;
+                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
+                     {
+ 		      /* Get to the beginning of the n-th alternative.  */
+                       p1 -= 3;
+                       break;
+                     }
+                 }
+ 
+               /* Deal with the last alternative: go back and get number
+                  of the `jump_past_alt' just before it.  `mcnt' contains
+                  the length of the alternative.  */
+               EXTRACT_NUMBER (mcnt, p1 - 2);
+ 
+               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
+                 return false;
+ 
+               p1 += mcnt;	/* Get past the n-th alternative.  */
+             } /* if mcnt > 0 */
+           break;
+ 
+           
+         case stop_memory:
+ 	  assert (p1[1] == **p);
+           *p = p1 + 2;
+           return true;
+ 
+         
+         default: 
+           if (!common_op_match_null_string_p (&p1, end, reg_info))
+             return false;
+         }
+     } /* while p1 < end */
+ 
+   return false;
+ } /* group_match_null_string_p */
+ 
+ 
+ /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
+    It expects P to be the first byte of a single alternative and END one
+    byte past the last. The alternative can contain groups.  */
+    
+ static boolean
+ alt_match_null_string_p (p, end, reg_info)
+     unsigned char *p, *end;
+     register_info_type *reg_info;
+ {
+   int mcnt;
+   unsigned char *p1 = p;
+   
+   while (p1 < end)
+     {
+       /* Skip over opcodes that can match nothing, and break when we get 
+          to one that can't.  */
+       
+       switch ((re_opcode_t) *p1)
+         {
+ 	/* It's a loop.  */
+         case on_failure_jump:
+           p1++;
+           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+           p1 += mcnt;
+           break;
+           
+ 	default: 
+           if (!common_op_match_null_string_p (&p1, end, reg_info))
+             return false;
+         }
+     }  /* while p1 < end */
+ 
+   return true;
+ } /* alt_match_null_string_p */
+ 
+ 
+ /* Deals with the ops common to group_match_null_string_p and
+    alt_match_null_string_p.  
+    
+    Sets P to one after the op and its arguments, if any.  */
+ 
+ static boolean
+ common_op_match_null_string_p (p, end, reg_info)
+     unsigned char **p, *end;
+     register_info_type *reg_info;
+ {
+   int mcnt;
+   boolean ret;
+   int reg_no;
+   unsigned char *p1 = *p;
+ 
+   switch ((re_opcode_t) *p1++)
+     {
+     case no_op:
+     case begline:
+     case endline:
+     case begbuf:
+     case endbuf:
+     case wordbeg:
+     case wordend:
+     case wordbound:
+     case notwordbound:
+ #ifdef emacs
+     case before_dot:
+     case at_dot:
+     case after_dot:
+ #endif
+       break;
+ 
+     case start_memory:
+       reg_no = *p1;
+       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
+       ret = group_match_null_string_p (&p1, end, reg_info);
+       
+       /* Have to set this here in case we're checking a group which
+          contains a group and a back reference to it.  */
+ 
+       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
+         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
+ 
+       if (!ret)
+         return false;
+       break;
+           
+     /* If this is an optimized succeed_n for zero times, make the jump.  */
+     case jump:
+       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+       if (mcnt >= 0)
+         p1 += mcnt;
+       else
+         return false;
+       break;
+ 
+     case succeed_n:
+       /* Get to the number of times to succeed.  */
+       p1 += 2;		
+       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ 
+       if (mcnt == 0)
+         {
+           p1 -= 4;
+           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+           p1 += mcnt;
+         }
+       else
+         return false;
+       break;
+ 
+     case duplicate: 
+       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
+         return false;
+       break;
+ 
+     case set_number_at:
+       p1 += 4;
+ 
+     default:
+       /* All other opcodes mean we cannot match the empty string.  */
+       return false;
+   }
+ 
+   *p = p1;
+   return true;
+ } /* common_op_match_null_string_p */
+ 
+ 
+ /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
+    bytes; nonzero otherwise.  */
+    
+ static int
+ bcmp_translate (s1, s2, len, translate)
+      const char *s1, *s2;
+      register int len;
+      char *translate;
+ {
+   register const unsigned char *p1 = (const unsigned char *) s1,
+ 			       *p2 = (const unsigned char *) s2;
+   unsigned char c;
+ 
+   while (len)
+     {
+       c = *p1++;
+       if (ismbchar(c)) {
+ 	if (c != *p2++ || !--len || *p1++ != *p2++)
+ 	  return 1;
+       }
+       else
+ 	if (translate[c] != translate[*p2++])
+ 	  return 1;
+       len--;
+     }
+   return 0;
+ }
+ 
+ /* Entry points for GNU code.  */
+ 
+ /* re_compile_pattern is the GNU regular expression compiler: it
+    compiles PATTERN (of length SIZE) and puts the result in BUFP.
+    Returns 0 if the pattern was valid, otherwise an error string.
+    
+    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+    are set in BUFP on entry.
+    
+    We call regex_compile to do the actual compilation.  */
+ 
+ const char *
+ re_compile_pattern (pattern, length, bufp)
+      const char *pattern;
+      size_t length;
+      struct re_pattern_buffer *bufp;
+ {
+   reg_errcode_t ret;
+   
+   /* GNU code is written to assume at least RE_NREGS registers will be set
+      (and at least one extra will be -1).  */
+   bufp->regs_allocated = REGS_UNALLOCATED;
+   
+   /* And GNU code determines whether or not to get register information
+      by passing null for the REGS argument to re_match, etc., not by
+      setting no_sub.  */
+   bufp->no_sub = 0;
+   
+   /* Match anchors at newline.  */
+   bufp->newline_anchor = 1;
+   
+   ret = regex_compile (pattern, length, re_syntax_options, bufp);
+ 
+   return re_error_msg[(int) ret];
+ }     
+ 
+ /* Entry points compatible with 4.2 BSD regex library.  We don't define
+    them if this is an Emacs or POSIX compilation.  */
+ 
+ #if !defined (emacs) && !defined (_POSIX_SOURCE)
+ 
+ /* BSD has one and only one pattern buffer.  */
+ static struct re_pattern_buffer re_comp_buf;
+ 
+ char *
+ re_comp (s)
+     const char *s;
+ {
+   reg_errcode_t ret;
+   
+   if (!s)
+     {
+       if (!re_comp_buf.buffer)
+ 	return "No previous regular expression";
+       return 0;
+     }
+ 
+   if (!re_comp_buf.buffer)
+     {
+       re_comp_buf.buffer = (unsigned char *) malloc (200);
+       if (re_comp_buf.buffer == NULL)
+         return "Memory exhausted";
+       re_comp_buf.allocated = 200;
+ 
+       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
+       if (re_comp_buf.fastmap == NULL)
+ 	return "Memory exhausted";
+     }
+ 
+   /* Since `re_exec' always passes NULL for the `regs' argument, we
+      don't need to initialize the pattern buffer fields which affect it.  */
+ 
+   /* Match anchors at newlines.  */
+   re_comp_buf.newline_anchor = 1;
+ 
+   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
+   
+   /* Yes, we're discarding `const' here.  */
+   return (char *) re_error_msg[(int) ret];
+ }
+ 
+ 
+ int
+ re_exec (s)
+     const char *s;
+ {
+   const int len = strlen (s);
+   return
+     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
+ }
+ #endif /* not emacs and not _POSIX_SOURCE */
+ 
+ /* POSIX.2 functions.  Don't define these for Emacs.  */
+ 
+ #ifndef emacs
+ 
+ /* regcomp takes a regular expression as a string and compiles it.
+ 
+    PREG is a regex_t *.  We do not expect any fields to be initialized,
+    since POSIX says we shouldn't.  Thus, we set
+ 
+      `buffer' to the compiled pattern;
+      `used' to the length of the compiled pattern;
+      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+        REG_EXTENDED bit in CFLAGS is set; otherwise, to
+        RE_SYNTAX_POSIX_BASIC;
+      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+      `fastmap' and `fastmap_accurate' to zero;
+      `re_nsub' to the number of subexpressions in PATTERN.
+ 
+    PATTERN is the address of the pattern string.
+ 
+    CFLAGS is a series of bits which affect compilation.
+ 
+      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+      use POSIX basic syntax.
+ 
+      If REG_NEWLINE is set, then . and [^...] don't match newline.
+      Also, regexec will try a match beginning after every newline.
+ 
+      If REG_ICASE is set, then we considers upper- and lowercase
+      versions of letters to be equivalent when matching.
+ 
+      If REG_NOSUB is set, then when PREG is passed to regexec, that
+      routine will report only success or failure, and nothing about the
+      registers.
+ 
+    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
+    the return codes and their meanings.)  */
+ 
+ int
+ regcomp (preg, pattern, cflags)
+     regex_t *preg;
+     const char *pattern; 
+     int cflags;
+ {
+   reg_errcode_t ret;
+   reg_syntax_t syntax
+     = (cflags & REG_EXTENDED) ?
+       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
+ 
+   /* regex_compile will allocate the space for the compiled pattern.  */
+   preg->buffer = 0;
+   preg->allocated = 0;
+   preg->used = 0;
+   
+   /* Don't bother to use a fastmap when searching.  This simplifies the
+      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
+      characters after newlines into the fastmap.  This way, we just try
+      every character.  */
+   preg->fastmap = 0;
+   
+   if (cflags & REG_ICASE)
+     {
+       unsigned i;
+       
+       preg->translate = (char *) malloc (CHAR_SET_SIZE);
+       if (preg->translate == NULL)
+         return (int) REG_ESPACE;
+ 
+       /* Map uppercase characters to corresponding lowercase ones.  */
+       for (i = 0; i < CHAR_SET_SIZE; i++)
+         preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
+     }
+   else
+     preg->translate = NULL;
+ 
+   /* If REG_NEWLINE is set, newlines are treated differently.  */
+   if (cflags & REG_NEWLINE)
+     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
+       syntax &= ~RE_DOT_NEWLINE;
+       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+       /* It also changes the matching behavior.  */
+       preg->newline_anchor = 1;
+     }
+   else
+     preg->newline_anchor = 0;
+ 
+   preg->no_sub = !!(cflags & REG_NOSUB);
+ 
+   /* POSIX says a null character in the pattern terminates it, so we 
+      can use strlen here in compiling the pattern.  */
+   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
+   
+   /* POSIX doesn't distinguish between an unmatched open-group and an
+      unmatched close-group: both are REG_EPAREN.  */
+   if (ret == REG_ERPAREN) ret = REG_EPAREN;
+   
+   return (int) ret;
+ }
+ 
+ 
+ /* regexec searches for a given pattern, specified by PREG, in the
+    string STRING.
+    
+    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
+    least NMATCH elements, and we set them to the offsets of the
+    corresponding matched substrings.
+    
+    EFLAGS specifies `execution flags' which affect matching: if
+    REG_NOTBOL is set, then ^ does not match at the beginning of the
+    string; if REG_NOTEOL is set, then $ does not match at the end.
+    
+    We return 0 if we find a match and REG_NOMATCH if not.  */
+ 
+ int
+ regexec (preg, string, nmatch, pmatch, eflags)
+     const regex_t *preg;
+     const char *string; 
+     size_t nmatch; 
+     regmatch_t pmatch[]; 
+     int eflags;
+ {
+   int ret;
+   struct re_registers regs;
+   regex_t private_preg;
+   int len = strlen (string);
+   boolean want_reg_info = !preg->no_sub && nmatch > 0;
+ 
+   private_preg = *preg;
+   
+   private_preg.not_bol = !!(eflags & REG_NOTBOL);
+   private_preg.not_eol = !!(eflags & REG_NOTEOL);
+   
+   /* The user has told us exactly how many registers to return
+      information about, via `nmatch'.  We have to pass that on to the
+      matching routines.  */
+   private_preg.regs_allocated = REGS_FIXED;
+   
+   if (want_reg_info)
+     {
+       regs.num_regs = nmatch;
+       regs.start = TALLOC (nmatch, regoff_t);
+       regs.end = TALLOC (nmatch, regoff_t);
+       if (regs.start == NULL || regs.end == NULL)
+         return (int) REG_NOMATCH;
+     }
+ 
+   /* Perform the searching operation.  */
+   ret = re_search (&private_preg, string, len,
+                    /* start: */ 0, /* range: */ len,
+                    want_reg_info ? &regs : (struct re_registers *) 0);
+   
+   /* Copy the register information to the POSIX structure.  */
+   if (want_reg_info)
+     {
+       if (ret >= 0)
+         {
+           unsigned r;
+ 
+           for (r = 0; r < nmatch; r++)
+             {
+               pmatch[r].rm_so = regs.start[r];
+               pmatch[r].rm_eo = regs.end[r];
+             }
+         }
+ 
+       /* If we needed the temporary register info, free the space now.  */
+       free (regs.start);
+       free (regs.end);
+     }
+ 
+   /* We want zero return to mean success, unlike `re_search'.  */
+   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
+ }
+ 
+ 
+ /* Returns a message corresponding to an error code, ERRCODE, returned
+    from either regcomp or regexec.   We don't use PREG here.  */
+ 
+ size_t
+ regerror (errcode, preg, errbuf, errbuf_size)
+     int errcode;
+     const regex_t *preg;
+     char *errbuf;
+     size_t errbuf_size;
+ {
+   const char *msg;
+   size_t msg_size;
+ 
+   if (errcode < 0
+       || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
+     /* Only error codes returned by the rest of the code should be passed 
+        to this routine.  If we are given anything else, or if other regex
+        code generates an invalid error code, then the program has a bug.
+        Dump core so we can fix it.  */
+     abort ();
+ 
+   msg = re_error_msg[errcode];
+ 
+   /* POSIX doesn't require that we do anything in this case, but why
+      not be nice.  */
+   if (! msg)
+     msg = "Success";
+ 
+   msg_size = strlen (msg) + 1; /* Includes the null.  */
+   
+   if (errbuf_size != 0)
+     {
+       if (msg_size > errbuf_size)
+         {
+           strncpy (errbuf, msg, errbuf_size - 1);
+           errbuf[errbuf_size - 1] = 0;
+         }
+       else
+         strcpy (errbuf, msg);
+     }
+ 
+   return msg_size;
+ }
+ 
+ 
+ /* Free dynamically allocated space used by PREG.  */
+ 
+ void
+ regfree (preg)
+     regex_t *preg;
+ {
+   if (preg->buffer != NULL)
+     free (preg->buffer);
+   preg->buffer = NULL;
+   
+   preg->allocated = 0;
+   preg->used = 0;
+ 
+   if (preg->fastmap != NULL)
+     free (preg->fastmap);
+   preg->fastmap = NULL;
+   preg->fastmap_accurate = 0;
+ 
+   if (preg->translate != NULL)
+     free (preg->translate);
+   preg->translate = NULL;
+ }
+ 
+ #endif /* not emacs  */
+ 
+ /*
+ Local variables:
+ make-backup-files: t
+ version-control: t
+ trim-versions-without-asking: nil
+ End:
+ */
diff -crP php-2.0.1/src/jp.regex/regex.h php-2.0.1.jp_urat-5.3/src/jp.regex/regex.h
*** php-2.0.1/src/jp.regex/regex.h	Thu Jan  1 09:00:00 1970
--- php-2.0.1.jp_urat-5.3/src/jp.regex/regex.h	Wed Feb 18 21:03:42 1998
***************
*** 0 ****
--- 1,505 ----
+ /* Definitions for data structures and routines for the regular
+    expression library, version 0.12.
+ 
+    Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
+ 
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2, or (at your option)
+    any later version.
+ 
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+ 
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+ 
+ #ifndef __REGEXP_LIBRARY_H__
+ #define __REGEXP_LIBRARY_H__
+ 
+ /* POSIX says that <sys/types.h> must be included (by the caller) before
+    <regex.h>.  */
+ 
+ #ifdef VMS
+ /* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+    should be there.  */
+ #include <stddef.h>
+ #endif
+ 
+ 
+ /* The following two types have to be signed and unsigned integer type
+    wide enough to hold a value of a pointer.  For most ANSI compilers
+    ptrdiff_t and size_t should be likely OK.  Still size of these two
+    types is 2 for Microsoft C.  Ugh... */
+ typedef long s_reg_t;
+ typedef unsigned long active_reg_t;
+ 
+ /* The following bits are used to determine the regexp syntax we
+    recognize.  The set/not-set meanings are chosen so that Emacs syntax
+    remains the value 0.  The bits are given in alphabetical order, and
+    the definitions shifted by one from the previous bit; thus, when we
+    add or remove a bit, only one other definition need change.  */
+ typedef unsigned long reg_syntax_t;
+ 
+ /* If this bit is not set, then \ inside a bracket expression is literal.
+    If set, then such a \ quotes the following character.  */
+ #define RE_BACKSLASH_ESCAPE_IN_LISTS (1L)
+ 
+ /* If this bit is not set, then + and ? are operators, and \+ and \? are
+      literals. 
+    If set, then \+ and \? are operators and + and ? are literals.  */
+ #define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+ 
+ /* If this bit is set, then character classes are supported.  They are:
+      [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
+      [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+    If not set, then character classes are not supported.  */
+ #define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+ 
+ /* If this bit is set, then ^ and $ are always anchors (outside bracket
+      expressions, of course).
+    If this bit is not set, then it depends:
+         ^  is an anchor if it is at the beginning of a regular
+            expression or after an open-group or an alternation operator;
+         $  is an anchor if it is at the end of a regular expression, or
+            before a close-group or an alternation operator.  
+ 
+    This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+    POSIX draft 11.2 says that * etc. in leading positions is undefined.
+    We already implemented a previous draft which made those constructs
+    invalid, though, so we haven't changed the code back.  */
+ #define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+ 
+ /* If this bit is set, then special characters are always special
+      regardless of where they are in the pattern.
+    If this bit is not set, then special characters are special only in
+      some contexts; otherwise they are ordinary.  Specifically, 
+      * + ? and intervals are only special when not after the beginning,
+      open-group, or alternation operator.  */
+ #define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+ 
+ /* If this bit is set, then *, +, ?, and { cannot be first in an re or
+      immediately after an alternation or begin-group operator.  */
+ #define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+ 
+ /* If this bit is set, then . matches newline.
+    If not set, then it doesn't.  */
+ #define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+ 
+ /* If this bit is set, then . doesn't match NUL.
+    If not set, then it does.  */
+ #define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+ 
+ /* If this bit is set, nonmatching lists [^...] do not match newline.
+    If not set, they do.  */
+ #define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+ 
+ /* If this bit is set, either \{...\} or {...} defines an
+      interval, depending on RE_NO_BK_BRACES. 
+    If not set, \{, \}, {, and } are literals.  */
+ #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+ 
+ /* If this bit is set, +, ? and | aren't recognized as operators.
+    If not set, they are.  */
+ #define RE_LIMITED_OPS (RE_INTERVALS << 1)
+ 
+ /* If this bit is set, newline is an alternation operator.
+    If not set, newline is literal.  */
+ #define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+ 
+ /* If this bit is set, then `{...}' defines an interval, and \{ and \}
+      are literals.
+   If not set, then `\{...\}' defines an interval.  */
+ #define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+ 
+ /* If this bit is set, (...) defines a group, and \( and \) are literals.
+    If not set, \(...\) defines a group, and ( and ) are literals.  */
+ #define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+ 
+ /* If this bit is set, then \<digit> matches <digit>.
+    If not set, then \<digit> is a back-reference.  */
+ #define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+ 
+ /* If this bit is set, then | is an alternation operator, and \| is literal. 
+    If not set, then \| is an alternation operator, and | is literal.  */
+ #define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+ 
+ /* If this bit is set, then an ending range point collating higher
+      than the starting range point, as in [z-a], is invalid.
+    If not set, then when ending range point collates higher than the
+      starting range point, the range is ignored.  */
+ #define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+ 
+ /* If this bit is set, then an unmatched ) is ordinary.
+    If not set, then an unmatched ) is invalid.  */
+ #define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+ 
+ /* If this bit is set, do not process the GNU regex operators.
+    IF not set, then the GNU regex operators are recognized. */
+ #define RE_NO_GNU_OPS (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+ 
+ /* This global variable defines the particular regexp syntax to use (for
+    some interfaces).  When a regexp is compiled, the syntax used is
+    stored in the pattern buffer, so changing this does not affect
+    already-compiled regexps.  */
+ extern reg_syntax_t re_syntax_options;
+ 
+ /* Define combinations of the above bits for the standard possibilities.
+    (The [[[ comments delimit what gets put into the Texinfo file, so
+    don't delete them!)  */ 
+ /* [[[begin syntaxes]]] */
+ #define RE_SYNTAX_EMACS 0
+ 
+ #define RE_SYNTAX_AWK							\
+   (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL			\
+    | RE_NO_BK_PARENS            | RE_NO_BK_REFS				\
+    | RE_NO_BK_VBAR               | RE_NO_EMPTY_RANGES			\
+    | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+ 
+ #define RE_SYNTAX_GNU_AWK 						\
+   (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+ 
+ #define RE_SYNTAX_POSIX_AWK 						\
+   (RE_SYNTAX_GNU_AWK | RE_NO_GNU_OPS)
+ 
+ #define RE_SYNTAX_GREP							\
+   (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
+    | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
+    | RE_NEWLINE_ALT)
+ 
+ #define RE_SYNTAX_EGREP							\
+   (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
+    | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
+    | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
+    | RE_NO_BK_VBAR)
+ 
+ #define RE_SYNTAX_POSIX_EGREP						\
+   (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+ 
+ /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
+ #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+ 
+ #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+ 
+ /* Syntax bits common to both basic and extended POSIX regex syntax.  */
+ #define _RE_SYNTAX_POSIX_COMMON						\
+   (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
+    | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
+ 
+ #define RE_SYNTAX_POSIX_BASIC						\
+   (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+ 
+ /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+    RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
+    isn't minimal, since other operators, such as \`, aren't disabled.  */
+ #define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
+   (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+ 
+ #define RE_SYNTAX_POSIX_EXTENDED					\
+   (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS			\
+    | RE_CONTEXT_INDEP_OPS  | RE_NO_BK_BRACES				\
+    | RE_NO_BK_PARENS       | RE_NO_BK_VBAR				\
+    | RE_UNMATCHED_RIGHT_PAREN_ORD)
+ 
+ /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+    replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added.  */
+ #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
+   (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
+    | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
+    | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
+    | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
+ /* [[[end syntaxes]]] */
+ 
+ /* Maximum number of duplicates an interval can allow.  Some systems
+    (erroneously) define this in other header files, but we want our
+    value, so remove any previous define.  */
+ #ifdef RE_DUP_MAX
+ #undef RE_DUP_MAX
+ #endif
+ /* if sizeof(int) == 2, then ((1 << 15) - 1) overflows  */
+ #define RE_DUP_MAX  (0x7fff)
+ 
+ 
+ /* POSIX `cflags' bits (i.e., information for `regcomp').  */
+ 
+ /* If this bit is set, then use extended regular expression syntax.
+    If not set, then use basic regular expression syntax.  */
+ #define REG_EXTENDED 1
+ 
+ /* If this bit is set, then ignore case when matching.
+    If not set, then case is significant.  */
+ #define REG_ICASE (REG_EXTENDED << 1)
+  
+ /* If this bit is set, then anchors do not match at newline
+      characters in the string.
+    If not set, then anchors do match at newlines.  */
+ #define REG_NEWLINE (REG_ICASE << 1)
+ 
+ /* If this bit is set, then report only success or fail in regexec.
+    If not set, then returns differ between not matching and errors.  */
+ #define REG_NOSUB (REG_NEWLINE << 1)
+ 
+ 
+ /* POSIX `eflags' bits (i.e., information for regexec).  */
+ 
+ /* If this bit is set, then the beginning-of-line operator doesn't match
+      the beginning of the string (presumably because it's not the
+      beginning of a line).
+    If not set, then the beginning-of-line operator does match the
+      beginning of the string.  */
+ #define REG_NOTBOL 1
+ 
+ /* Like REG_NOTBOL, except for the end-of-line.  */
+ #define REG_NOTEOL (1 << 1)
+ 
+ 
+ /* If any error codes are removed, changed, or added, update the
+    `re_error_msg' table in regex.c.  */
+ typedef enum
+ {
+   REG_NOERROR = 0,	/* Success.  */
+   REG_NOMATCH,		/* Didn't find a match (for regexec).  */
+ 
+   /* POSIX regcomp return error codes.  (In the order listed in the
+      standard.)  */
+   REG_BADPAT,		/* Invalid pattern.  */
+   REG_ECOLLATE,		/* Not implemented.  */
+   REG_ECTYPE,		/* Invalid character class name.  */
+   REG_EESCAPE,		/* Trailing backslash.  */
+   REG_ESUBREG,		/* Invalid back reference.  */
+   REG_EBRACK,		/* Unmatched left bracket.  */
+   REG_EPAREN,		/* Parenthesis imbalance.  */ 
+   REG_EBRACE,		/* Unmatched \{.  */
+   REG_BADBR,		/* Invalid contents of \{\}.  */
+   REG_ERANGE,		/* Invalid range end.  */
+   REG_ESPACE,		/* Ran out of memory.  */
+   REG_BADRPT,		/* No preceding re for repetition op.  */
+ 
+   /* Error codes we've added.  */
+   REG_EEND,		/* Premature end.  */
+   REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
+   REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
+ } reg_errcode_t;
+ 
+ /* This data structure represents a compiled pattern.  Before calling
+    the pattern compiler, the fields `buffer', `allocated', `fastmap',
+    `translate', and `no_sub' can be set.  After the pattern has been
+    compiled, the `re_nsub' field is available.  All other fields are
+    private to the regex routines.  */
+ 
+ struct re_pattern_buffer
+ {
+ /* [[[begin pattern_buffer]]] */
+ 	/* Space that holds the compiled pattern.  It is declared as
+           `unsigned char *' because its elements are
+            sometimes used as array indexes.  */
+   unsigned char *buffer;
+ 
+ 	/* Number of bytes to which `buffer' points.  */
+   unsigned long allocated;
+ 
+ 	/* Number of bytes actually used in `buffer'.  */
+   unsigned long used;	
+ 
+         /* Syntax setting with which the pattern was compiled.  */
+   reg_syntax_t syntax;
+ 
+         /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
+            the fastmap, if there is one, to skip over impossible
+            starting points for matches.  */
+   char *fastmap;
+ 
+         /* Either a translate table to apply to all characters before
+            comparing them, or zero for no translation.  The translation
+            is applied to a pattern when it is compiled and to a string
+            when it is matched.  */
+   char *translate;
+ 
+ 	/* Number of subexpressions found by the compiler.  */
+   size_t re_nsub;
+ 
+         /* Zero if this pattern cannot match the empty string, one else.
+            Well, in truth it's used only in `re_search_2', to see
+            whether or not we should use the fastmap, so we don't set
+            this absolutely perfectly; see `re_compile_fastmap' (the
+            `duplicate' case).  */
+   unsigned can_be_null : 1;
+ 
+         /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+              for `max (RE_NREGS, re_nsub + 1)' groups.
+            If REGS_REALLOCATE, reallocate space if necessary.
+            If REGS_FIXED, use what's there.  */
+ #define REGS_UNALLOCATED 0
+ #define REGS_REALLOCATE 1
+ #define REGS_FIXED 2
+   unsigned regs_allocated : 2;
+ 
+         /* Set to zero when `regex_compile' compiles a pattern; set to one
+            by `re_compile_fastmap' if it updates the fastmap.  */
+   unsigned fastmap_accurate : 1;
+ 
+         /* If set, `re_match_2' does not return information about
+            subexpressions.  */
+   unsigned no_sub : 1;
+ 
+         /* If set, a beginning-of-line anchor doesn't match at the
+            beginning of the string.  */ 
+   unsigned not_bol : 1;
+ 
+         /* Similarly for an end-of-line anchor.  */
+   unsigned not_eol : 1;
+ 
+         /* If true, an anchor at a newline matches.  */
+   unsigned newline_anchor : 1;
+ 
+ /* [[[end pattern_buffer]]] */
+ };
+ 
+ typedef struct re_pattern_buffer regex_t;
+ 
+ 
+ /* search.c (search_buffer) in Emacs needs this one opcode value.  It is
+    defined both in `regex.c' and here.  */
+ #define RE_EXACTN_VALUE 1
+ 
+ /* Type for byte offsets within the string.  POSIX mandates this.  */
+ typedef int regoff_t;
+ 
+ 
+ /* This is the structure we store register match data in.  See
+    regex.texinfo for a full description of what registers match.  */
+ struct re_registers
+ {
+   unsigned num_regs;
+   regoff_t *start;
+   regoff_t *end;
+ };
+ 
+ 
+ /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+    `re_match_2' returns information about at least this many registers
+    the first time a `regs' structure is passed.  */
+ #ifndef RE_NREGS
+ #define RE_NREGS 30
+ #endif
+ 
+ 
+ /* POSIX specification for registers.  Aside from the different names than
+    `re_registers', POSIX uses an array of structures, instead of a
+    structure of arrays.  */
+ typedef struct
+ {
+   regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
+   regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
+ } regmatch_t;
+ 
+ /* Declarations for routines.  */
+ 
+ /* To avoid duplicating every routine declaration -- once with a
+    prototype (if we are ANSI), and once without (if we aren't) -- we
+    use the following macro to declare argument types.  This
+    unfortunately clutters up the declarations a bit, but I think it's
+    worth it.  */
+ 
+ #ifdef __STDC__
+ 
+ #define _RE_ARGS(args) args
+ 
+ #else /* not __STDC__ */
+ 
+ #define _RE_ARGS(args) ()
+ 
+ #endif /* not __STDC__ */
+ 
+ /* Sets the current default syntax to SYNTAX, and return the old syntax.
+    You can also simply assign to the `re_syntax_options' variable.  */
+ extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+ 
+ /* Compile the regular expression PATTERN, with length LENGTH
+    and syntax given by the global `re_syntax_options', into the buffer
+    BUFFER.  Return NULL if successful, and an error string if not.  */
+ extern const char *re_compile_pattern
+   _RE_ARGS ((const char *pattern, size_t length,
+              struct re_pattern_buffer *buffer));
+ 
+ 
+ /* Compile a fastmap for the compiled pattern in BUFFER; used to
+    accelerate searches.  Return 0 if successful and -2 if was an
+    internal error.  */
+ extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+ 
+ 
+ /* Search in the string STRING (with length LENGTH) for the pattern
+    compiled into BUFFER.  Start searching at position START, for RANGE
+    characters.  Return the starting position of the match, -1 for no
+    match, or -2 for an internal error.  Also return register
+    information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
+ extern int re_search
+   _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+             int length, int start, int range, struct re_registers *regs));
+ 
+ 
+ /* Like `re_search', but search in the concatenation of STRING1 and
+    STRING2.  Also, stop searching at index START + STOP.  */
+ extern int re_search_2
+   _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+              int length1, const char *string2, int length2,
+              int start, int range, struct re_registers *regs, int stop));
+ 
+ 
+ /* Like `re_search', but return how many characters in STRING the regexp
+    in BUFFER matched, starting at position START.  */
+ extern int re_match
+   _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+              int length, int start, struct re_registers *regs));
+ 
+ 
+ /* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
+ extern int re_match_2 
+   _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+              int length1, const char *string2, int length2,
+              int start, struct re_registers *regs, int stop));
+ 
+ 
+ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+    ENDS.  Subsequent matches using BUFFER and REGS will use this memory
+    for recording register information.  STARTS and ENDS must be
+    allocated with malloc, and must each be at least `NUM_REGS * sizeof
+    (regoff_t)' bytes long.
+ 
+    If NUM_REGS == 0, then subsequent matches should allocate their own
+    register data.
+ 
+    Unless this function is called, the first search or match using
+    PATTERN_BUFFER will allocate its own register data, without
+    freeing the old data.  */
+ extern void re_set_registers
+   _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+              unsigned num_regs, regoff_t *starts, regoff_t *ends));
+ 
+ /* 4.2 bsd compatibility.  */
+ extern char *re_comp _RE_ARGS ((const char *));
+ extern int re_exec _RE_ARGS ((const char *));
+ 
+ /* POSIX compatibility.  */
+ extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
+ extern int regexec
+   _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
+              regmatch_t pmatch[], int eflags));
+ extern size_t regerror
+   _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
+              size_t errbuf_size));
+ extern void regfree _RE_ARGS ((regex_t *preg));
+ 
+ #endif /* not __REGEXP_LIBRARY_H__ */
+ 
+ /*
+ Local variables:
+ make-backup-files: t
+ version-control: t
+ trim-versions-without-asking: nil
+ End:
+ */
Binary files php-2.0.1/src/jp.regex/regex.o and php-2.0.1.jp_urat-5.3/src/jp.regex/regex.o differ
diff -crP php-2.0.1/src/kanjiconv.c php-2.0.1.jp_urat-5.3/src/kanjiconv.c
*** php-2.0.1/src/kanjiconv.c	Thu Jan  1 09:00:00 1970
--- php-2.0.1.jp_urat-5.3/src/kanjiconv.c	Wed Feb 18 21:03:42 1998
***************
*** 0 ****
--- 1,1079 ----
+ /************************************************************************/
+ /*                                                                      */
+ /* kanjiconv.c                                                          */
+ /*                                                                      */
+ /* by Mitsuhiro Maeda(mitsu@tramp.co.jp)                                */
+ /*   adding this to PHP/FI as internal function                         */
+ /*                                                                      */
+ /* Usage in PHP script :                                                */
+ /*   SetKanjiOutput("<mode>");                                          */
+ /*                                                                      */
+ /* ARGS                                                                 */
+ /*    <mode>  : EUC | SJIS | JIS                                        */
+ /*                                                                      */
+ /* EXAMPLE                                                              */
+ /*   SetKanjiOutput("EUC");                                             */
+ /*                                                                      */
+ /************************************************************************/
+ 
+ #include <stdio.h>
+ #include <string.h>
+ #include "php.h"
+ #include "parse.h"
+ #if APACHE
+ #include "http_protocol.h"
+ #endif
+ 
+ /*
+  * definitions for Kanji conversion
+  */
+ #define ESC 	'\033'
+ 
+ #define KI1 	'$'
+ #define KI2	'@'
+ #define KI3 	'B'
+ 
+ #define KO1 	'('
+ #define KO2	'@'
+ #define KO3	'H'
+ #define KO4 	'B'
+ #define KO5	'J'
+ 
+ #define	KANAIN		0x0e
+ #define	KANAOUT		0x0f
+ 
+ #define	Z_BEGIN		0xa1	/* Zenkaku Begin */
+ #define	Z_END		0xfe	/* Zenkaku END */
+ #define SS2    		0x8e	/* Hankaku Kana */
+ 
+ static void etoj(unsigned char *,const unsigned char *);
+ static void etos(unsigned char *,const unsigned char *);
+ static void jtoe(unsigned char *,const unsigned char *);
+ static void jtos(unsigned char *,const unsigned char *);
+ static void stoe(unsigned char *,const unsigned char *);
+ static void stoj(unsigned char *,const unsigned char *);
+ static void JIS_IBM(register unsigned char, register unsigned char, 
+ 		    register unsigned char *, register unsigned char *);
+ static void JIS_EUC(register unsigned char, register unsigned char,
+ 		    register unsigned char *, register unsigned char *);
+ static void JIS_SJIS(register unsigned char, register unsigned char,
+ 		     register unsigned char *, register unsigned char *);
+ static void sj_to_euc(register unsigned char, register unsigned char,
+ 		      register unsigned char *, register unsigned char *);
+ static void SJIS_JIS(register unsigned char, register unsigned char,
+ 		     register unsigned char *, register unsigned char *);
+ 
+ 
+ /* Kanji Code for output */
+ int output_kanji_code=EUC;
+ 
+ /* (*function_table[dest][src])(dest,src) */
+ static FUNC function_table[4][4]={
+   { (void*)strcpy, (void*)strcpy, (void*)strcpy, (void*)strcpy}, /* dest : NONE */
+   { (void*)strcpy, (void*)strcpy, jtoe, stoe}, /* dest :  EUC */
+   { (void*)strcpy, etoj, (void*)strcpy, stoj}, /* dest :  JIS */
+   { (void*)strcpy, etos, jtos, (void*)strcpy}  /* dest : SJIS */
+ };
+ 
+ /*								*/
+ /*	etoj.c	2.0	62.06.17	by shige@csk.JUNET	*/
+ /*								*/
+ /*	etoj	EUC code convert into JIS code.			*/
+ /*								*/
+ /*		KI	KO					*/
+ /*		^[$B	^[(B	JIS 83				*/
+ /*								*/
+ 
+ /************************************************************************/
+ /*                                                                      */
+ /* Modified (a lot ;-) by o3(mitsu@tramp.co.jp)                         */
+ /*   adding this to PHP/FI as internal function                         */
+ /*                                                                      */
+ /************************************************************************/
+ 
+ static void
+ etoj(unsigned char *dest, const unsigned char *src)
+ {
+   register int stat;
+   register unsigned char c;
+ 
+   int i,pos;
+ 
+   if(src==NULL || !strlen(src)){
+     dest[0]='\0';
+     return;
+   }
+ 
+   i=0;
+   pos=0;
+   stat = 0;
+   while ((c = src[i++]) != '\0') {
+     switch (stat) {
+     case 0:
+       if(0xa1 <= c && c <= 0xfe) { /* ZENKAKU */
+ 	dest[pos++]=ESC;
+ 	dest[pos++]=KI1;
+ 	dest[pos++]=KI3;
+ 	dest[pos++]=(c & 0x7f);
+ 	stat = 1;
+       }
+       else if(c == SS2) { /* Hankaku Kana */
+ 	dest[pos++]=KANAIN;
+ 	c = src[i++];
+ 	dest[pos++]=(c & 0x7f);
+ 	stat = 3;
+       }
+       else /* ASCII */
+ 	dest[pos++]=c;
+       break;
+     case 1:
+       stat = 2;
+       dest[pos++]=(c & 0x7f);
+       break;
+     case 2:
+       if(0xa1 <= c && c <= 0xfe) { /* ZENKAKU */
+ 	dest[pos++]=(c & 0x7f);
+ 	stat = 1;
+       }
+       else {
+ 	dest[pos++]=ESC;
+ 	dest[pos++]=KO1;
+ 	dest[pos++]=KO4;
+ 	i--;
+ 	stat = 0;
+       }
+       break;
+     case 3:				 /* hankaku kana */
+       if(c == SS2) {
+ 	c=src[i++];
+ 	dest[pos++]=(c & 0x7f);
+       }
+       else {
+ 	dest[pos++]=KANAOUT;
+ 	i--;
+ 	stat = 0;
+       }
+       break;
+     }
+   }
+   if ( stat == 1 || stat == 2 ) {
+     /* put KANJI-OUT */
+     dest[pos++]=ESC;
+     dest[pos++]=KO1;
+     dest[pos++]=KO4;
+   }
+   else if (stat == 3) 
+     dest[pos++]=KANAOUT;
+ 
+   /* terminate */
+   dest[pos]='\0';
+ }
+ 
+ /*
+  *	EUC --> Sift_JIS  Convert Routine
+  */
+ 
+ /************************************************************************/
+ /*                                                                      */
+ /* Modified (a lot ;-) by o3(mitsu@tramp.co.jp)                         */
+ /*   adding this to PHP/FI as internal function                         */
+ /*                                                                      */
+ /************************************************************************/
+ 
+ static void
+ JIS_IBM(jis1,jis2,ibm1,ibm2)
+      register unsigned char jis1,jis2,*ibm1,*ibm2;
+ {
+   if(jis1 >= 0x5f)    jis1 += 0x80;
+   
+   if((jis1 % 2) == 0) {
+     *ibm1 = (jis1 - 0x30)/2 + 0x88;
+     *ibm2 = jis2 + 0x7e;
+   }
+   else {
+     if(jis2 >= 0x60)
+       jis2 = jis2 + 0x01;
+     *ibm1 = (jis1 - 0x31)/2 + 0x89;
+     *ibm2 = jis2 + 0x1f;
+   }
+   *ibm1 &= 0xff;
+   *ibm2 &= 0xff;
+ }
+ 
+ static void
+ etos(unsigned char *dest, const unsigned char *src)
+ {
+   register int stat;
+   register unsigned char c,c1,c2;
+   unsigned char s1,s2;
+   
+   int i,pos;
+ 
+   if(src==NULL || !strlen(src)){
+     dest[0]='\0';
+     return;
+   }
+ 
+   i=0;
+   pos=0;
+   stat = 0;
+   while ((c = src[i++]) != '\0') {
+     switch (stat) {
+     case 0:
+       if (Z_BEGIN <= c  && c <= Z_END) { /* Zenkaku */
+ 	c1 = c & 0x7f;
+ 	stat = 1;
+       }
+       else if (c == SS2) { /* Hankaku Kana */
+ 	c = src[i++];
+ 	dest[pos++]=c;
+       }
+       else /* ASCII */
+ 	dest[pos++]=c;
+       break;
+     case 1:
+       stat = 0;
+       c2 = c & 0x7f;
+       JIS_IBM(c1,c2,&s1,&s2); /* Convert JIS to Sift_JIS */
+       dest[pos++]=s1;
+       dest[pos++]=s2;
+       break;
+     }
+   }
+ 
+   /* terminate */
+   dest[pos]='\0';
+ }
+ 
+ /*								*/
+ /*	jtoe.c	2.0	62.06.17	by shige@csk.JUNET	*/
+ /*								*/
+ /*	jtoe	all JIS code convert into EUC code.		*/
+ /*								*/
+ /*		KI	KO					*/
+ /*		^[$@	^[(H	JIS 78  &  (incorrect code)	*/
+ /*		^[$@	^[(J	JIS 78  &  JIS_ROMAN		*/
+ /*		^[$@	^[(B	JIS 78  &  ASCII		*/
+ /*		^[$B	^[(J	JIS 83  &  JIS_ROMAN		*/
+ /*		^[$B	^[(B	JIS 83  &  ASCII		*/
+ /*								*/
+ 
+ /************************************************************************/
+ /*                                                                      */
+ /* Modified (a lot ;-) by o3(mitsu@tramp.co.jp)                         */
+ /*   adding this to PHP/FI as internal function                         */
+ /*                                                                      */
+ /************************************************************************/
+ 
+ static void 
+ JIS_EUC(jis1,jis2,euc1,euc2)
+      register unsigned char jis1,jis2,*euc1,*euc2;
+ {
+   *euc1 = jis1 | 0x80;
+   *euc2 = jis2 | 0x80;
+ }
+ 
+ static void
+ jtoe(unsigned char *dest, const unsigned char *src)
+ {
+   register int stat;
+   register unsigned char c, c1, c2;
+   unsigned char s1, s2;
+   static int js78on = 0, js83on = 0;
+   
+   int i,pos;
+ 
+   if(src==NULL || !strlen(src)){
+     dest[0]='\0';
+     return;
+   }
+   
+   i=0;
+   pos=0;
+   stat = 0;
+   while ((c = src[i++]) != '\0') {
+     switch (stat) {
+     case 0:
+       if (c == ESC) 
+ 	stat = 1;
+       else if(c == KANAIN)
+ 	stat = 6;
+       else
+ 	dest[pos++]=c;
+       break;
+     case 1: /*  ESC ----> ?  */
+       switch(c) {
+       case KO1:
+ 	stat = 2;
+ 	break;
+       case KI1:
+ 	stat = 3;
+ 	break;
+       case KANAIN:
+ 	dest[pos++]=ESC;
+ 	stat = 6;
+ 	break;
+       case ESC:
+ 	dest[pos++]=ESC;
+ 	break;
+       default:
+ 	dest[pos++]=ESC;
+ 	dest[pos++]=c;
+ 	stat = 0;
+ 	break;
+       }
+       break;
+     case 2: /* ESC --> KO1 --> ? */
+       switch(c){
+       case KO5: /* ^[(J : KOUT (to JIS_ROMAN) */
+       case KO4: /* ^[(B : KOUT (to ASCII) */
+ 	if (js78on || js83on){ /* JIS 78 or JIS 83 KANJI */
+ 	  js83on = 0;
+ 	  js78on = 0;
+ 	  stat = 0;
+ 	}
+ 	break;
+       case KO3: /* ^[(H : incorrect but used */
+ 	if (js78on){ /* JIS 78 KANJI */
+ 	  js78on = 0;
+ 	  stat = 0;
+ 	}
+ 	break;
+       case KANAIN:
+ 	dest[pos++]=ESC;
+ 	dest[pos++]=KO1;
+ 	stat = 6;
+ 	break;
+       case ESC:
+ 	dest[pos++]=ESC;
+ 	dest[pos++]=KO1;
+ 	stat = 1;
+ 	break;
+       default:
+ 	dest[pos++]=ESC;
+ 	dest[pos++]=KO1;
+ 	dest[pos++]=c;
+ 	stat = 0;
+ 	break;
+       }
+       break;
+     case 3: /* ESC --> KI1 --> ? */
+       switch(c){
+       case KI2: /* ^[$@ : JIS 78 KIN */
+ 	stat = 4;
+ 	js78on = 1;
+ 	break;
+       case KI3: /* ^[$B : JIS 83 KIN */
+ 	stat = 4;
+ 	js83on = 1;
+ 	break;
+       case KANAIN:
+ 	dest[pos++]=ESC;
+ 	dest[pos++]=KI1;
+ 	stat = 6;
+ 	break;
+       case ESC:
+ 	dest[pos++]=ESC;
+ 	dest[pos++]=KI1;
+ 	stat = 1;
+ 	break;
+       default:
+ 	dest[pos++]=ESC;
+ 	dest[pos++]=KI1;
+ 	dest[pos++]=c;
+ 	stat = 0;
+ 	break;
+       }
+       break;
+     case 4: /* JIS KANJI 1st */
+       if (c == ESC)
+ 	stat = 1;
+       else if (c == KANAIN) ;
+       else {
+ 	c1 = c;
+ 	stat = 5;
+       }
+       break;
+     case 5: /* JIS KANJI 2nd */
+       c2 = c;
+       JIS_EUC(c1,c2,&s1,&s2);
+       dest[pos++]=s1;
+       dest[pos++]=s2;
+       stat = 4;
+       break;
+     case 6: /* HANKAKU KANA */
+       if(c == KANAIN) ;
+       else if(c == KANAOUT)
+ 	stat = 0;
+       else {
+ 	if((c >=0x21) && (c <= 0x5f))
+ 	  c |= 0x80;
+ 	dest[pos++]=SS2;
+ 	dest[pos++]=c;
+       }
+       break;
+     }
+   }
+ 
+   /* terminate */
+   dest[pos]='\0';
+ }
+ 
+ /*								*/
+ /*	jtos.c	2.0	62.06.17	by shige@csk.JUNET	*/
+ /*								*/
+ /*	jtos	all JIS code convert into SJIS code.		*/
+ /*								*/
+ /*		KI	KO					*/
+ /*		^[$@	^[(H	JIS 78  &  (incorrect code)	*/
+ /*		^[$@	^[(J	JIS 78  &  JIS_ROMAN		*/
+ /*		^[$@	^[(B	JIS 78  &  ASCII		*/
+ /*		^[$B	^[(J	JIS 83  &  JIS_ROMAN		*/
+ /*		^[$B	^[(B	JIS 83  &  ASCII		*/
+ /*								*/
+ 
+ /************************************************************************/
+ /*                                                                      */
+ /* Modified (a lot ;-) by Mitsuhiro Maeda(mitsu@tramp.co.jp)            */
+ /*   adding this to PHP/FI as internal function                         */
+ /*                                                                      */
+ /************************************************************************/
+ 
+ static void
+ JIS_SJIS(jis1,jis2,sjis1,sjis2)
+      register unsigned char jis1,jis2,*sjis1,*sjis2;
+ {
+   if(jis1 >= 0x5f)
+     jis1 += 0x80;
+   
+   if((jis1 % 2) == 0) {
+     *sjis1 = (jis1 - 0x30)/2 + 0x88;
+     *sjis2 = jis2 + 0x7e;
+   }
+   else {
+     if(jis2 >= 0x60)
+       jis2 = jis2 + 0x01;
+     *sjis1 = (jis1 - 0x31)/2 + 0x89;
+     *sjis2 = jis2 + 0x1f;
+   }
+   *sjis1 &= 0xff;
+   *sjis2 &= 0xff;
+ }
+ 
+ static void
+ jtos(unsigned char *dest, const unsigned char *src)
+ {
+   int stat;
+   register unsigned char c, c1, c2;
+   unsigned char s1, s2;
+   static int js78on=0, js83on=0;
+ 
+   int i,pos;
+ 
+   if(src==NULL || !strlen(src)){
+     dest[0]='\0';
+     return;
+   }
+ 
+   i=0;
+   pos=0;
+   stat = 0;
+   while((c = src[i++]) != '\0'){
+     switch (stat) {
+     case 0:
+       if (c == ESC) 
+ 	stat = 1;
+       else if(c == KANAIN)
+ 	stat = 6;
+       else
+ 	dest[pos++]=c;
+       break;
+     case 1: /*  ESC ----> ?  */
+       switch (c) {
+       case KO1:
+ 	stat = 2;
+ 	break;
+       case KI1:
+ 	stat = 3;
+ 	break;
+       case KANAIN:
+ 	dest[pos++]=ESC;
+ 	stat = 6;
+ 	break;
+       case ESC:
+ 	dest[pos++]=ESC;
+ 	break;
+       default:
+ 	dest[pos++]=ESC;
+ 	dest[pos++]=c;
+ 	stat = 0;
+ 	break;
+       }
+       break;
+     case 2: /* ESC --> KO1 --> ? */
+       switch(c){
+       case KO5: /* ^[(J : KOUT (to JIS_ROMAN) */
+       case KO4: /* ^[(B : KOUT (to ASCII) */
+ 	if (js78on || js83on){ /* JIS 78 or JIS 83 KANJI */
+ 	  js83on = 0;
+ 	  js78on = 0;
+ 	  stat = 0;
+ 	}
+ 	break;
+       case KO3: /* ^[(H : incorrect but used */
+ 	if(js78on){ /* JIS 78 KANJI */
+ 	  js78on = 0;
+ 	  stat = 0;
+ 	}
+ 	break;
+       case KANAIN:
+ 	dest[pos++]=ESC;
+ 	dest[pos++]=KO1;
+ 	stat = 6;
+ 	break;
+       case ESC:
+ 	dest[pos++]=ESC;
+ 	dest[pos++]=KO1;
+ 	stat = 1;
+ 	break;
+       default:
+ 	dest[pos++]=ESC;
+ 	dest[pos++]=KO1;
+ 	dest[pos++]=c;
+ 	stat = 0;
+ 	break;
+       }
+       break;
+     case 3:			/* ESC --> KI1 --> ? */
+       switch(c){
+       case KI2:		/* ^[$@ : JIS 78 KIN */
+ 	stat = 4;
+ 	js78on = 1;
+ 	break;
+       case KI3:		/* ^[$B : JIS 83 KIN */
+ 	stat = 4;
+ 	js83on = 1;
+ 	break;
+       case KANAIN:
+ 	dest[pos++]=ESC;
+ 	dest[pos++]=KI1;
+ 	stat = 6;
+ 	break;
+       case ESC:
+ 	dest[pos++]=ESC;
+ 	dest[pos++]=KI1;
+ 	stat = 1;
+ 	break;
+       default:
+ 	dest[pos++]=ESC;
+ 	dest[pos++]=KI1;
+ 	dest[pos++]=c;
+ 	stat = 0;
+ 	break;
+       }
+       break;
+     case 4:		/* JIS KANJI 1st */
+       if (c == ESC)
+ 	stat = 1;
+       else if (c == KANAIN) ;
+       else {
+ 	c1 = c;
+ 	stat = 5;
+       }
+       break;
+     case 5:		/* JIS KANJI 2nd */
+       c2 = c;
+       JIS_SJIS(c1,c2,&s1,&s2);
+       dest[pos++]=s1;
+       dest[pos++]=s2;
+       stat = 4;
+       break;
+     case 6:		/* HANKAKU KANA */
+       if(c == KANAIN) ;
+       else if(c == KANAOUT)
+ 	stat = 0;
+       else {
+ 	if((c >=0x21) && (c <= 0x5f))
+ 	  c |= 0x80;
+ 	dest[pos++]=c;
+       }
+       break;
+     }
+   }
+ 
+   /* terminate */
+   dest[pos]='\0';
+ 
+ }
+ 
+ /*
+  *	Sift_JIS ----> EUC  Convert Routine
+  */
+ 
+ /************************************************************************/
+ /*                                                                      */
+ /* Modified (a lot ;-) by o3(mitsu@tramp.co.jp)                         */
+ /*   adding this to PHP/FI as internal function                         */
+ /*                                                                      */
+ /************************************************************************/
+ 
+ static void
+ sj_to_euc(sj1,sj2,euc1,euc2)
+      register unsigned char sj1,sj2,*euc1,*euc2;
+ {
+   if (sj1 >= 0xe0) /* The 2nd standard of JIS */
+     sj1 -=  0x40;
+   
+   if (sj2 >= 0x9f) {
+     *euc1 = (sj1 - 0x88)*2 + 0xb0;
+     *euc2 = sj2 + 0x02;
+   }
+   else {
+     if (sj2 >= 0x7f)
+       sj2 -= 0x01;
+     *euc1 = (sj1 - 0x89)*2 + 0xb1;
+     *euc2 = sj2 + 0x61;
+   }
+ }
+ 
+ static void
+ stoe(unsigned char *dest, const unsigned char *src)
+ {
+   register int stat;
+   register unsigned char c,c1;
+   unsigned char s1,s2;
+   
+   int i,pos;
+ 
+   if(src==NULL || !strlen(src)){
+     dest[0]='\0';
+     return;
+   }
+ 
+   i=0;
+   pos=0;
+   stat = 0;
+   while ((c = src[i++]) != '\0') {
+     switch (stat) {
+     case 0:
+       if((c>=0x81 && c<=0x9f) || (c>=0xe0 && c<=0xfc)) {
+ 	c1 = c; /* ZENKAKU */
+ 	stat = 1;
+       }
+       else if((0xa1 <= c) && (c <= 0xdf)){ /* Hankaku Kana */
+ 	dest[pos++]=SS2;
+ 	dest[pos++]=c;
+       }
+       else /* ASCII */
+ 	dest[pos++]=c;
+       break;
+     case 1:
+       stat = 0;
+       sj_to_euc(c1,c,&s1,&s2); /* Change Shift_JIS into EUC */
+       dest[pos++]=s1;
+       dest[pos++]=s2;
+       break;
+     }
+   }
+ 
+   /* terminate */
+   dest[pos]='\0';
+ 
+ }
+ 
+ /*								*/
+ /*	stoj.c	2.0	62.06.17	by shige@csk.JUNET	*/
+ /*								*/
+ /*	stoj	SJIS code convert into JIS code.		*/
+ /*								*/
+ /*		KI	KO					*/
+ /*		^[$B	^[(B	JIS 83				*/
+ /*								*/
+ 
+ /************************************************************************/
+ /*                                                                      */
+ /* Modified (a lot ;-) by Mitsuhiro Maeda(mitsu@tramp.co.jp)            */
+ /*   adding this to PHP/FI as internal function                         */
+ /*                                                                      */
+ /************************************************************************/
+ 
+ static void
+ SJIS_JIS(sjis1,sjis2,jis1,jis2)
+      register unsigned char sjis1,sjis2,*jis1,*jis2;
+ {
+   if (sjis1 >= 0xe0)	/* The 2nd standard of JIS */
+     sjis1 -=  0x40;
+ 	
+   if (sjis2 >= 0x9f) {
+     *jis1 = (sjis1 - 0x88)*2 + 0x30;
+     *jis2 = sjis2 - 0x7e;
+   }
+   else {
+     if (sjis2 >= 0x7f)
+       sjis2 -= 0x01;
+     *jis1 = (sjis1 - 0x89)*2 + 0x31;
+     *jis2 = sjis2 - 0x1f;
+   }
+ }
+ 
+ static void
+ stoj(unsigned char *dest, const unsigned char *src)
+ {
+   register int stat;
+   register unsigned char c,c1,c2;
+   unsigned char s1,s2;
+ 
+   int i,pos;
+ 
+   if(src==NULL || !strlen(src)){
+     dest[0]='\0';
+     return;
+   }
+ 
+   i=0;
+   pos=0;
+   stat = 0;
+   while ((c = src[i++]) != '\0') {
+     switch (stat) {
+     case 0:
+       if((c>=0x81 && c<=0x9f) || (c>=0xe0 && c<=0xfc)) {
+ 	c1 = c;	           
+ 	stat = 1;
+ 	dest[pos++]=ESC; /* put KANJI-IN */
+ 	dest[pos++]=KI1;
+ 	dest[pos++]=KI3;
+       }
+       else if((0xa1 <= c) && (c <= 0xdf)) { /* hankaku kana */
+ 	dest[pos++]=KANAIN;
+ 	c &= 0x7f;
+ 	dest[pos++]=c;
+ 	stat = 3;
+       }
+       else
+ 	dest[pos++]=c; /* put ASCII code */
+       break;
+     case 1:
+       stat = 2;
+       c2 = c;
+       SJIS_JIS(c1,c2,&s1,&s2);  /* change shift-JIS into JIS */
+       dest[pos++]=s1; /* put KANJI code */
+       dest[pos++]=s2;
+       break;
+     case 2:
+       if((c>=0x81 && c<=0x9f) || (c>=0xe0 && c<=0xfc)) {
+ 	c1 = c;	           
+ 	stat = 1;
+       } else {
+ 	dest[pos++]=ESC; /* put KANJI-OUT */
+ 	dest[pos++]=KO1;
+ 	dest[pos++]=KO4;
+ 	i--;
+ 	stat = 0;
+       }
+       break;
+     case 3:				 /* hankaku kana */
+       if((0xa1 <= c) && (c <= 0xdf)) {
+ 	c &= 0x7f;
+ 	dest[pos++]=c;
+       }
+       else {
+ 	dest[pos++]=KANAOUT;
+ 	i--;
+ 	stat = 0;
+       }
+       break;
+     }
+   }
+   if ( stat == 1 || stat == 2 ) {
+     /* put KANJI-OUT */
+     dest[pos++]=ESC;
+     dest[pos++]=KO1;
+     dest[pos++]=KO4;
+   }
+   else if (stat == 3)
+     dest[pos++]=KANAOUT;
+ 
+   /* terminate */
+   dest[pos]='\0';
+ 
+ }
+ 
+ /*
+  * Kanjiconv
+  *
+  */
+ #if APACHE
+ void php_init_kanji(php_module_conf *conf)
+ #else
+ void php_init_kanji(void)
+ #endif
+ {
+ #if APACHE
+   if(conf->KanjiOutput){
+     if(!strcasecmp(conf->KanjiOutput,"EUC"))
+       output_kanji_code=EUC;
+     else if(!strcasecmp(conf->KanjiOutput,"JIS"))
+       output_kanji_code=JIS;
+     else if(!strcasecmp(conf->KanjiOutput,"SJIS"))
+       output_kanji_code=SJIS;
+     else
+       output_kanji_code=EUC;
+    }
+    else output_kanji_code=EUC;
+ #else
+   output_kanji_code=EUC;
+ #endif
+ }
+ 
+ static int
+ _checkforward(unsigned char *p)
+ {
+   unsigned char c;
+ 
+   if(*++p=='\0')
+     return EUC;
+   
+   while(1){
+     if(c=='\033'){
+       if((c=*p++)=='\0')
+ 	break;
+       if(c=='$'){
+ 	if((c=*p++)=='\0')
+ 	  break;
+ 	if(c=='B' || c=='@')
+ 	  return JIS;
+ 	else
+ 	  continue;
+       }
+       else if(c=='K')
+ 	return JIS;
+       else
+ 	continue;
+     }
+     else if(c>=0x81){
+       if(c==0x8e){
+ 	if((*++p)=='\0')
+ 	  break;
+       }
+       else if(c<=0x9f){
+ 	if((c=*p++)=='\0')
+ 	  break;
+ 	if(iskanji2nd(c))
+ 	  return SJIS;
+ 	else
+ 	  continue;
+       }
+       else if(c>=0xa1 && c<=0xdf){
+ 	if((c=*p++)=='\0')
+ 	  break;
+ 	if(iskana(c))
+ 	  continue;
+ 	else if (iseuc(c))
+ 	  return EUC;
+ 	else
+ 	  continue;
+       }
+       else if(c!=0xa0)
+ 	return EUC;
+     }
+     if((c=*p++)=='\0')
+       break;
+   }
+ 
+   return EUC;
+ }
+ 
+ static int 
+ _kanjicode(unsigned char *buf)
+ {
+   unsigned char *p,c;
+   int unknownstat = 0;
+ 
+   if(buf==NULL || buf[0]=='\0')
+     return NONE;
+ 
+   p=buf;
+   c=*p++;
+   while(1){
+     if(c=='\033'){
+       if((c=*p++)=='\0')
+ 	break;
+       if(c=='$'){
+ 	if((c=*p++)=='\0')
+ 	  break;
+ 	if(c=='B' || c=='@')
+ 	  return JIS;
+ 	else
+ 	  continue;
+       }
+       else if(c=='K')
+ 	return JIS;
+       else
+ 	continue;
+     }
+     else if(c>=0x81){
+       if(c==0x8e){
+ 	if((c=*p++)=='\0')
+ 	  break;
+ 	if(iskana(c))
+ 	  unknownstat |= 2;
+ 	else if (iskanji2nd(c))
+ 	  return SJIS;
+ 	else
+ 	  continue;
+       }
+       else if(c<=0x9f){
+ 	if((c=*p++)=='\0')
+ 	  break;
+ 	if(iskanji2nd(c))
+ 	  return SJIS;
+ 	else
+ 	  continue;
+       }
+       else if(c>=0xa1 && c<=0xdf || c==0xfd || c==0xfe){
+ 	if((c=*p++)=='\0')
+ 	  break;
+ 	if(iseuc(c))
+ 	  if(iskana(c))
+ 	    if(*(p+1)=='\0')
+ 	      return EUC;
+ 	    else
+ 	      return _checkforward(p);
+ 	  else
+ 	    return EUC;
+ 	else
+ 	  continue;
+       }
+       else if(c>=0xe0 && c<=0xfc){
+ 	if((c=*p++)=='\0')
+ 	  break;
+ 	if(iskanji2nd(c))
+ 	  if(iseuc(c))
+ 	    unknownstat |= 1;
+ 	  else
+ 	    return SJIS;
+ 	else
+ 	  if(iseuc(c))
+ 	    return EUC;
+ 	  else
+ 	    continue;
+       }
+     }
+     if((c=*p++)=='\0')
+       break;
+   }
+ 
+   if(unknownstat==2)
+     return SJIS;
+   else
+     return NONE;
+ 
+ }
+ 
+ size_t
+ conv2euc(char *src,long file_size)
+ {
+   unsigned char *temp;
+ #if DEBUG
+   int i;
+   i=_kanjicode(src);
+   Debug("conv2euc: converted %i to EUC\n",i);
+ #endif
+   temp=emalloc(1,file_size+1);
+   (*function_table[EUC][_kanjicode(src)])(temp,(unsigned char*)src);
+   strcpy(src,temp);
+ #if DEBUG
+   Debug(" conv2euc * 2 : src=%d temp=%d\n",strlen(src),strlen(temp));
+   Debug(" conv2euc * 3 : converted [%s]\n",temp);
+ #endif
+   return (size_t)strlen(temp);
+ }
+ 
+ char *
+ conv2jis(char *src)
+ {
+   unsigned char *temp;
+   /* even if all the characters are kanji ... 
+      2*size would be ok */
+   temp=emalloc(1,2*strlen(src)+1);
+ 
+   /* all of kanji code in PHP is EUC ... */
+   (*function_table[JIS][EUC])(temp,(unsigned char*)src);
+ 
+   return (char*)temp;
+ }
+ 
+ void
+ SetKanjiOutput(void)
+ {
+   Stack *s;
+   unsigned char *ret;
+ 
+   s=Pop();
+   if(!s){
+     Error("Stack error in KanjiConv");
+     return;
+   }
+ 
+   if(!strcasecmp(s->strval,"EUC")){
+     output_kanji_code=EUC;
+ #if DEBUG
+     Debug("SetKanjiOutput to EUC\n");
+ #endif
+   }
+   else if(!strcasecmp(s->strval,"JIS")){
+ #if DEBUG
+     Debug("SetKanjiOutput to JIS\n");
+ #endif
+     output_kanji_code=JIS;
+   }
+   else if(!strcasecmp(s->strval,"SJIS")){
+ #if DEBUG
+     Debug("SetKanjiOutput to SJIS\n");
+ #endif
+     output_kanji_code=SJIS;
+   }
+   else {
+ #if DEBUG
+     Debug("SetKanjiOutput to *DEFAULT*\n");
+ #endif
+     output_kanji_code=EUC;
+   }
+   return;
+ }
+ 
+ 
+ int
+ kanji_puts(char *str)
+ {
+   unsigned char *ret;
+ 
+   ret=(unsigned char*)emalloc(1,2*strlen(str)+1);
+ 
+   (*function_table[output_kanji_code][EUC])(ret,str);
+ 
+ #if APACHE
+ #if APACHE_NEWAPI
+   return rputs(ret,php_rqst);
+ #else
+   return rprintf(php_rqst,"%s",ret);
+ #endif
+ #else
+   return fputs(ret,stdout);
+ #endif
+ 
+ }
+ 
+ int
+ kanji_fputs(char *str,FILE *fp)
+ {
+   int i;
+   unsigned char *ret;
+ 
+   ret=(unsigned char*)emalloc(1,2*strlen(str)+1);
+ 
+   (*function_table[output_kanji_code][EUC])(ret,str);
+ 
+   return fputs(ret,fp);
+ }
+ 
diff -crP php-2.0.1/src/lex.c php-2.0.1.jp_urat-5.3/src/lex.c
*** php-2.0.1/src/lex.c	Sat Oct 25 23:32:30 1997
--- php-2.0.1.jp_urat-5.3/src/lex.c	Thu Mar 26 03:09:04 1998
***************
*** 281,286 ****
--- 281,288 ----
  #ifdef HAVE_ODBC
  	  { "sqlfetch", INTFUNC1,ODBCfetch },
  #endif	  	 
+ 	  { "mbsubstr", INTFUNC3,MBSubStr },
+ 	  { "mbstrlen", INTFUNC1,MBStrLen },
  	  { NULL,0,NULL } },
  
  	{ { "endswitch", ENDSWITCH,NULL }, /* 9 */
***************
*** 376,381 ****
--- 378,384 ----
  	  { "sybsql_exit",INTFUNC0,SybsqlExit},
  	  { "mi_fieldnum", INTFUNC3,MIfieldNum },
  	  { "phpshowpool",INTFUNC0,ShowPool },
+       { "pg_connect2", INTFUNC6,PGconnect2 },
  #if HAVE_LIBADABAS
  	  { "ada_numrows", INTFUNC1,Ada_numRows },
  	  { "ada_connect", INTFUNC3,Ada_connect },
***************
*** 469,474 ****
--- 472,478 ----
  #if HAVE_LIBADABAS
  	  { "ada_freeresult", INTFUNC1,Ada_freeResult },
  #endif
+ 	  { "setkanjioutput", INTFUNC1, SetKanjiOutput },
  	  { NULL,0,NULL } },
  
  	{ { "msql_freeresult", INTFUNC1,MsqlFreeResult }, /* 15 */
***************
*** 1471,1476 ****
--- 1475,1481 ----
  void ParserInit(int fd, long file_size, int nh, char *fbuf) {
  	no_httpd = nh;
  
+ 	gsize = file_size;
  	if(fd!=-1) {
  #ifdef PHP_HAVE_MMAP
  #if DEBUG
***************
*** 1485,1490 ****
--- 1490,1497 ----
  		fp = fdopen(fd,"r");
  		pa = emalloc(0,file_size + 1);
  		fread(pa,1,file_size,fp);
+ 		pa[file_size] = '\0';
+ 		gsize = (long)conv2euc(pa, file_size);
  		pa_pos = 0L;
  		fclose(fp);
  		}
***************
*** 1500,1506 ****
  		pa_pos = 0L;
  		gfd=-1;
  	}
- 	gsize = file_size;
  	inpos = -1;
  	tokenmarker=0;
  	yylex_linenumber = 0;
--- 1507,1512 ----
diff -crP php-2.0.1/src/log.c php-2.0.1.jp_urat-5.3/src/log.c
*** php-2.0.1/src/log.c	Sat Aug 16 12:51:40 1997
--- php-2.0.1.jp_urat-5.3/src/log.c	Thu Mar 26 03:16:54 1998
***************
*** 120,125 ****
--- 120,126 ----
  
  char *filename_to_logfn(char *filename) {
  	char *lfn, *lp, *ret;
+ 	int i;
  
  	if (forcelogfile) { filename = forcelogfile; }
  	lfn = estrdup(1,filename);
***************
*** 132,138 ****
--- 133,144 ----
  			lp++;
  		}
  	}
+ #if 0
  	lp = _RegReplace("/","_",lp);
+ #else
+ 	for (i=0, lfn=lp; *lfn && i<PATH_MAX; lfn++, i++)
+                 if (*lfn == '/') *lfn = '_';
+ #endif
  	ret = estrdup(1,lp);
  	return(ret);
  }
diff -crP php-2.0.1/src/mail.c php-2.0.1.jp_urat-5.3/src/mail.c
*** php-2.0.1/src/mail.c	Fri Apr 18 20:02:23 1997
--- php-2.0.1.jp_urat-5.3/src/mail.c	Wed Feb 18 21:03:42 1998
***************
*** 98,109 ****
  
      sendmail = popen(SENDMAIL, "w");
  	if (sendmail) {
! 		fprintf(sendmail, "To: %s\n", to);
! 		fprintf(sendmail, "Subject: %s\n", subject);
  		if (headers != NULL) {
  			fprintf(sendmail, "%s\n", headers);
  		}
! 		fprintf(sendmail, "\n%s\n.\n", message);
          sprintf(temp, "%d", pclose(sendmail));
  	    Push(temp,LNUMBER);
  	}
--- 98,109 ----
  
      sendmail = popen(SENDMAIL, "w");
  	if (sendmail) {
! 		fprintf(sendmail, "To: %s\n", conv2jis(to));
! 		fprintf(sendmail, "Subject: %s\n", conv2jis(subject));
  		if (headers != NULL) {
  			fprintf(sendmail, "%s\n", headers);
  		}
! 		fprintf(sendmail, "\n%s\n.\n", conv2jis(message));
          sprintf(temp, "%d", pclose(sendmail));
  	    Push(temp,LNUMBER);
  	}
diff -crP php-2.0.1/src/main.c php-2.0.1.jp_urat-5.3/src/main.c
*** php-2.0.1/src/main.c	Tue Oct 14 19:13:44 1997
--- php-2.0.1.jp_urat-5.3/src/main.c	Wed Feb 18 21:03:42 1998
***************
*** 118,123 ****
--- 118,124 ----
  #ifdef HAVE_LIBADABAS
  	php_init_adabas(NULL, NULL, NULL);
  #endif /*HAVE_LIBADABAS*/
+ 	php_init_kanji();
  			
  
  /* 
***************
*** 323,328 ****
--- 324,330 ----
  #if HAVE_LIBOCIC
  	php_init_oracle();
  #endif
+ 	php_init_kanji(conf);
  	
  /* 
  	Check to see if there are any special HTTP headers 
diff -crP php-2.0.1/src/mb.c php-2.0.1.jp_urat-5.3/src/mb.c
*** php-2.0.1/src/mb.c	Thu Jan  1 09:00:00 1970
--- php-2.0.1.jp_urat-5.3/src/mb.c	Wed Feb 18 21:03:42 1998
***************
*** 0 ****
--- 1,85 ----
+ #include <stdlib.h>
+ #include <string.h>
+ #include <ctype.h>
+ #include "php.h"
+ #include "parse.h"
+ 
+ static int 
+ _mbstrlen(unsigned char* str)
+ {
+   int i, ret;
+   for(i=0, ret=0;i<strlen(str);i++){
+     if( iseuc(str[i]) )/* ZENKAKU */
+       i++;
+     ret++;
+   }
+   return ret;
+ }
+ 
+ void 
+ MBStrLen(void) {
+   Stack *s;
+   char temp[32];
+   s = Pop();
+   if(!s) {
+     Error("Stack Error in MBstrlen function");
+     return;
+   }
+   sprintf(temp, "%d", _mbstrlen((unsigned char*)s->strval));
+   Push(temp, LNUMBER);
+ }
+ 
+ void 
+ MBSubStr(void)
+ {
+   Stack *s;
+   int m,n,i,j,len,p;
+   unsigned char *str;
+ 
+   s = Pop();
+   if(!s) {
+     Error("Stack Error in substr function");
+     return;
+   }
+   n = s->intval;
+ 
+   s = Pop();
+   if(!s) {
+     Error("Stack Error in substr function");
+     return;
+   }
+   m = s->intval;
+   
+   s = Pop();
+   if(!s) {
+     Error("Stack Error in substr function");
+     return;
+   }
+ 
+   len = _mbstrlen((unsigned char*)s->strval);
+   if(m > len) {
+     Push("",STRING);
+     return;
+   }	
+ 
+   str = (unsigned char *)estrdup(1,s->strval);
+   if(m+n > len){
+     for(i=0,p=0;i<strlen(str);i++){
+       if(p==m) break;
+       if(iseuc(str[i])) i++;
+       p++;
+     }
+     Push(&str[i],STRING);
+     return;
+   }
+ 
+   for(i=0,j=0,p=0;i<strlen(str);i++){
+     if(p==m) j=i;
+     if(p==(m+n)) break;
+     if(iseuc(str[i])) i++;
+     p++;
+   }
+   str[i]='\0';
+   Push(&str[j], STRING);
+ }
+ 
diff -crP php-2.0.1/src/mod_php.c php-2.0.1.jp_urat-5.3/src/mod_php.c
*** php-2.0.1/src/mod_php.c	Sat Nov 15 16:35:56 1997
--- php-2.0.1.jp_urat-5.3/src/mod_php.c	Wed Feb 18 21:03:43 1998
***************
*** 137,142 ****
--- 137,143 ----
  	new->AdaUser = NULL;
  	new->AdaPW = NULL;
  	new->AdaDB = NULL;
+ 	new->KanjiOutput=NULL;
  	return new;
  }
  
***************
*** 214,219 ****
--- 215,223 ----
  	case 11:
  		conf->AdaPW = pstrdup(cmd->pool,arg);
  		break;
+ 	case 12:
+ 		conf->KanjiOutput = pstrdup(cmd->pool,arg);
+ 		break;
  	}
  	return NULL;
  }
***************
*** 255,260 ****
--- 259,265 ----
  	{ "phpAdaDefDB",phptake1handler,(void *)9,OR_OPTIONS,TAKE1,"database" },
  	{ "phpAdaDefUser",phptake1handler,(void *)10,OR_OPTIONS,TAKE1,"user" },
  	{ "phpAdaDefPW",phptake1handler,(void *)11,OR_OPTIONS,TAKE1,"password" },
+ 	{ "phpKanjiOutput",phptake1handler,(void *)12,OR_OPTIONS,TAKE1,"kanji-mode name" },
  	{ NULL }
  };
  
diff -crP php-2.0.1/src/mod_php.h php-2.0.1.jp_urat-5.3/src/mod_php.h
*** php-2.0.1/src/mod_php.h	Sun Sep 14 01:14:29 1997
--- php-2.0.1.jp_urat-5.3/src/mod_php.h	Wed Feb 18 21:03:43 1998
***************
*** 36,41 ****
--- 36,42 ----
  	int MaxDataSpace;
  	int Debug;
  	int engine;
+ 	char *KanjiOutput;
  	int LastModified;
  	char *AdaUser;
  	char *AdaPW;
diff -crP php-2.0.1/src/pg95.c php-2.0.1.jp_urat-5.3/src/pg95.c
*** php-2.0.1/src/pg95.c	Fri Jun 13 03:59:40 1997
--- php-2.0.1.jp_urat-5.3/src/pg95.c	Thu Mar 26 03:06:31 1998
***************
*** 721,726 ****
--- 721,934 ----
  #endif
  }
  
+ void PGconnect2(void) {
+ #ifdef HAVE_LIBPQ
+   Stack		*s;
+   PGconn		*new_conn;
+   char		*host=NULL;
+   char		*port=NULL;
+   char		*options=NULL;
+   char		*tty=NULL;
+   char		*db=NULL;
+   char		*user=NULL;
+   char		*passwd=NULL;
+   char		*temp;
+   int		j;
+   int		len = 0;
+   char		*connect_string;
+ 
+   s = Pop();
+   if (!s) {
+     Error("Stack error in pg_connect2");
+     Push("0", LNUMBER);
+     return;
+   }
+   if (s->strval) {
+     user = estrdup(1,s->strval);
+     if (!(temp = strchr(user,(int)'/'))) {
+       Error("No user/passwd delimiter / found in pg_connect2");
+       Push("0", LNUMBER);
+       return;
+     }
+     *temp = '\0';
+     len += strlen(user) + strlen("user=") + 1;
+ 
+     if (strlen(temp+1) == 0) {
+       Error("No passwd supplied in pg_connect2");
+       Push("0", LNUMBER);
+       return;
+     }
+     passwd = estrdup(1,temp+1);
+     len += strlen(passwd) + strlen("password=") + 1;
+   }
+ 	    
+   else {
+     Error("No user/passwd pair supplied in pg_connect2");
+     Push("0", LNUMBER);
+     return;
+   }
+ 
+   s = Pop();
+   if (!s) {
+     Error("Stack error in pg_connect2");
+     Push("0", LNUMBER);
+     return;
+   }
+   if (s->strval) {
+     db = estrdup(1,s->strval);
+     len += strlen(db) + strlen("dbname=") + 1;
+   }
+   else {
+     Error("No database name supplied in pg_connect2");
+     Push("0", LNUMBER);
+     return;
+   }
+ 	
+   s = Pop();
+   if (!s) {
+     Error("Stack error in pg_connect2");
+     Push("0", LNUMBER);
+     return;
+   }
+   if (s->strval) {
+     if (strlen(s->strval)) {
+       tty = estrdup(1,s->strval);
+       len += strlen(tty) + strlen("tty=") + 1;
+     }
+   }
+   else {
+     Error("No tty name supplied in pg_connect2");
+     Push("0", LNUMBER);
+     return;
+   }
+ 
+   s = Pop();
+   if (!s) {
+     Error("Stack error in pg_connect2");
+     Push("0", LNUMBER);
+     return;
+   }
+   if (s->strval) {
+     if (strlen(s->strval)) {
+       options = estrdup(1,s->strval);
+       len += strlen(options) + strlen("options=") + 1;
+     }
+   }
+   else {
+     Error("No options string supplied in pg_connect2");
+     Push("0", LNUMBER);
+     return;
+   }
+ 	
+   s = Pop();
+   if (!s) {
+     Error("Stack error in pg_connect2");
+     Push("0", LNUMBER);
+     return;
+   }
+   if (s->strval) {
+     if (strlen(s->strval)) {
+       port = estrdup(1,s->strval);
+       len += strlen(port) + strlen("port=") + 1;
+     }
+   }
+   else {
+     Error("No port number supplied in pg_connect2");
+     Push("0", LNUMBER);
+     return;
+   }
+ 	
+   s = Pop();
+   if (!s) {
+     Error("Stack error in pg_connect2");
+     Push("0", LNUMBER);
+     return;
+   }
+   if (s->strval) {
+     if (strlen(s->strval)) {
+       host = estrdup(1,s->strval);
+       len += strlen(host) + strlen("host=") + 1;
+     }
+   }
+   else {
+     Error("No host name/address supplied in pg_connect2");
+     Push("0", LNUMBER);
+     return;
+   }
+ 
+   len += strlen("authtype=password");
+ 
+   connect_string = emalloc(1,len+1);
+   *connect_string = '\0';
+ 
+   if (host){
+     strcat(connect_string,"host=");
+     strcat(connect_string,host);
+     strcat(connect_string," ");
+   }
+   if (port){
+     strcat(connect_string,"port=");
+     strcat(connect_string,port);
+     strcat(connect_string," ");
+   }
+   if (options){
+     strcat(connect_string,"options=");
+     strcat(connect_string,options);
+     strcat(connect_string," ");
+   }
+   if (tty){
+     strcat(connect_string,"tty=");
+     strcat(connect_string,tty);
+     strcat(connect_string," ");
+   }
+   if (db){
+     strcat(connect_string,"dbname=");
+     strcat(connect_string,db);
+     strcat(connect_string," ");
+   }
+   if (user){
+     strcat(connect_string,"user=");
+     strcat(connect_string,user);
+     strcat(connect_string," ");
+   }
+   if (passwd){
+     strcat(connect_string,"password=");
+     strcat(connect_string,passwd);
+     strcat(connect_string," ");
+   }
+   strcat(connect_string,"host=");
+   strcat(connect_string,host);
+   strcat(connect_string," ");
+   strcat(connect_string,"authtype=password");
+ 
+   new_conn = PQconnectdb(connect_string);
+   if ((new_conn == NULL) || (PQstatus(new_conn) == CONNECTION_BAD)) {
+     Error("Could not connect to database (%s)", PQerrorMessage(new_conn));
+     temp = (char*) emalloc(1,2);
+     sprintf(temp, "0");
+   }
+   else
+     {
+       j = pg_add_conn(new_conn);
+       temp = (char*) emalloc(1,(j%10)+3);
+       sprintf(temp, "%d", j);
+       
+       /* get and cache the type table */
+       if (pgTypeRes == 0) 
+ 	pg_type(new_conn, -2);
+     }
+   Push(temp, LNUMBER);
+ #else
+   Pop();
+   Pop();
+   Pop();
+   Pop();
+   Pop();
+   Pop();
+   Error("No postgres95 support");
+ #endif
+ }
+ 
  void PGclose(void) {
  #ifdef HAVE_LIBPQ
  	Stack		*s;
diff -crP php-2.0.1/src/php.h php-2.0.1.jp_urat-5.3/src/php.h
*** php-2.0.1/src/php.h	Sat Jan 10 03:57:57 1998
--- php-2.0.1.jp_urat-5.3/src/php.h	Thu Mar 26 03:06:32 1998
***************
*** 73,83 ****
--- 73,87 ----
  #if HAVE_MEMORY_H
  #include <memory.h>
  #endif
+ #ifdef MB
+ #include "regex.h"
+ #else
  #if HAVE_REGCOMP
  #include <regex.h>
  #else
  #include "regex.h"
  #endif
+ #endif
  #if PHPFASTCGI
  #include "fcgi_stdio.h"
  #endif
***************
*** 899,904 ****
--- 903,909 ----
  void PGexec(void);
  void PG_result(void);
  void PGconnect(void);
+ void PGconnect2(void);
  void PGclose(void);
  void PGnumRows(void);
  void PGnumFields(void);
***************
*** 1298,1303 ****
--- 1303,1344 ----
  void Ora_Open(void);
  void Ora_Parse(int);
  void Ora_Rollback(void);
+ 
+ /* kanjiconv.c */
+ #define NONE 0
+ #define EUC  1
+ #define JIS  2
+ #define SJIS 3
+ 
+ typedef void (*FUNC)();
+ 
+ #define iskanji1st(c) ((c) >= 0x81 && (c) <= 0x9f || (c) >= 0xe0 && (c) <= 0xfc)
+ #define iskanji2nd(c) ((c) >= 0x40 && (c) <= 0xfc && (c) != 0x7f)
+ #define iseuc(c) ((c) >= 0xa1 && (c) <= 0xfe)
+ #define isjis(c) ((c) >= 0x21 && (c) <= 0x7f)
+ #define iskana(c) ((c) >= 0xa0 && (c) <= 0xdf)
+ 
+ extern int output_kanji_code;
+ 
+ void SetKanjiOutput(void);
+ size_t conv2euc(char *,long);
+ char *conv2jis(char *);
+ 
+ int kanji_fputs(char*,FILE*);
+ int kanji_puts(char*);
+ #undef PUTS(a)
+ #define PUTS(a) kanji_puts(a)
+ /* PUTC will be ok w/o conversion */
+ 
+ #if APACHE
+ void php_init_kanji(php_module_conf *);
+ #else
+ void php_init_kanji(void);
+ #endif;
+ 
+ /* mb.c */
+ void MBStrLen(void);
+ void MBSubStr(void);
  void Ora_Bind(void);
  void php_init_oracle(void);
  void OraCloseAll(void);
diff -crP php-2.0.1/src/post.c php-2.0.1.jp_urat-5.3/src/post.c
*** php-2.0.1/src/post.c	Wed May 21 22:14:18 1997
--- php-2.0.1.jp_urat-5.3/src/post.c	Wed Feb 18 21:03:43 1998
***************
*** 319,324 ****
--- 319,325 ----
  				}
  			}
  			parse_url(t+1);
+ 			(void)conv2euc(t+1,strlen(t+1));
  			tmp = estrdup(1,t+1);
  #if DEBUG
  			Debug("TreatData: setting $%s=%s (%d,%d)\n",s,tmp,itype,inc);