Regexp::Assemble

e.g.

  use Regexp::Assemble;

  my $ra = Regexp::Assemble->new;
  $ra->add( 'ab+c' );
  $ra->add( 'ab+-' );
  $ra->add( 'a\w\d+' );
  $ra->add( 'a\d+' );
  print $ra->re; # prints a(?:\w?\d+|b+[-c])


"Regexp::Assemble"
"R::A"
"ab(cd(ef)?gh)ij"
"R::A"
"\"
"["
"("
"?"
"+"
"*"
"{"
    $re->add( '\d+-\d+-\d+-\d+\.example\.com' );
    $re->add( <IN> );


"while"
    $re->add($_) while <IN>;


"add"
"chomp"
    $re->chomp(0);
    $re->add($_) while <IN>;


  $r->add_file( 'file.1', 'file.2' );


"eval"
chomp(0)
"input_record_separator"
$/
"add_file"
"local"
$/
  $r->add_file({file => ['file.1', 'file.2', 'file.3']});
  $r->add_file({file => 'file.n'});


  $r->add_file({file => 'data.txt', input_record_separator => ':' });


  $r->add_file( {
    file => [ 'pattern.txt', 'more.txt' ],
    input_record_separator  => "\r\n",
  });


  my $re = $main->clone->re();
  $main->add( 'another-pattern-\\d+' );


"ab+c?d*e"
"('a', 'b+', 'c?', 'd*', 'e')"
  my $ra = Regexp::Assemble->new
    ->insert( qw[ a b+ c? d* e ] )
    ->insert( qw[ a c+ d+ e* f ] );


"insert"
"lexstr"
"join"
  my @token = $re->lexstr($pattern);
  my $new_pattern = join( '', @token );


"\x4b"
"\Q"
"\E"
"lexstr"
"undef"
  $re->pre_filter( sub { $_[0] =~ s/\s*#.*$//; 1 } );


  $re->pre_filter( sub { length(shift) } );


"undef"
  $ra->pre_filter(undef);


  $ra->filter(sub { not grep { / / } @_ });


  sub only_spaces_and_digits {
    not grep { ![\d ] } @_
  }
  $ra->filter( \&only_spaces_and_digits );


"warn"
"die"
"undef"
  $ra->filter(undef);


"(?:"
")"
  egrep -v '^ *[()]' <regexp.file>


"m/.../x"
"indent"
"qr//"
"as_string"
"as_string"
"Regexp::Assemble"
  my $re = Regexp::Assemble->new
    ->add( q[ab+cd+e] )
    ->add( q[ac\\d+e] )
    ->add( q[c\\d+e] )
    ->re;


$re
  while( <> ) {
    /$re/ and print "Something in [$_] matched\n";
  )


"re"
"m//"
  my $re = Regexp::Assemble->new->add( qw[ fee fie foe fum ] );
  while( <IN> ) {
    if( /($re)/ ) {
      print "Here be giants: $1\n";
    }
  }


"match"
"matched"
"use re 'eval'"
    my $did_match = do { use re 'eval'; $target =~ /$ra/ }
    if( $did_match ) {
        print "matched ", $ra->matched, "\n";
    }


$^R
$^R
"use re 'eval'"
"Regexp::Assemble"
"/^(?{system 'rm -rf /'})/"
$^R
"/$re/"
"match"
   if( defined( my $match = $ra->match($_)) ) {
       print "  $_ matched by $match\n";
   }


"matched"
"match"
"matched"
"foo(bar)rat"
"mbegin"
"mend"
"mvar"
"capture"
$^R
"re 'eval'"
"Regexp::Assemble"
"^"
"\b"
"\Z"
"imsx"
"chomp"
"file"
"add_file"
  my $r = Regexp::Assemble->new(file => 're.list');

  my $r = Regexp::Assemble->new(file => ['re.1', 're.2']);


"chomp"
"input_record_separator"
"file"
"add_file"
$/
"matched"
"as_string"
"x+"
"x"
"x*"
"a(?:bc+d|ec+d)"
"a[be]c+d"
  my $ra = Regexp::Assemble->new;
  my $rb = Regexp::Assemble->new( chomp => 1, debug => 3 );


"Regexp::Assemble"
$^R
"undef"
  my $r = Regexp::Assemble->new->track(1)->add(qw(foo? bar{2} [Rr]at));

  for my $w (qw(this food is rather barren)) {
    if ($w =~ /$r/) {
      print "$w matched by ", $r->source($^R), $/;
    }
    else {
      print "$w no match\n";
    }
  }


"@-"
"mvar"
"@+"
"mvar"
mvar(1)
$1
mvar(2)
$2
mvar(0)
"@-"
"@+"
"mvar"
"capture"
"mvar"
"$1, $2, $3, ..."
"undef"
"for my $c ($re->capture) { ..."
"track"
"track"
"(?{...})"
"add"
"insert"
"a\-b"
"a-b"
"ab"
"ab"
"abc\,def"
"\,"
","
"\Qa.b\E"
"a\.b"
"as_string"
"re"
"(?-xism..."
  $r->dup_warn();


  $r->dup_warn(
    sub {
      my $self = shift;
      print $self->stats_add, " patterns added at line $.\n",
          join( '', @_ ), " added previously\n";
    }
  )


"^"
"$"
  $r->add(qw(^this ^that ^them))->as_string;

  $r->add(qw(this that them))->anchor_line_begin->as_string;

  # both techniques will produce ^th(?:at|em|is)


"\b"
"^"
"$"
"\A"
"\Z"
"\z"
"anchor_
"
"anchor_
_begin"
"anchor_
_end"
"anchor_word_begin"
"anchor_line_begin"
"anchor_word_begin"
"\b"
  $r->add('pre')->anchor_word_begin->as_string;
  # produces '\bpre'


"\b"
  $r->add(qw(ing tion))
    ->anchor_word_end
    ->as_string; # produces '(?:tion|ing)\b'


"\b"
  $r->add(qw(cat carrot)
    ->anchor_word(1)
    ->as_string; # produces '\bca(?:rro)t\b'


"^"
  $r->anchor_line_begin;
  # or
  $r->anchor_line_begin(1);


"$"
  # turn it off
  $r->anchor_line_end(0);


"^"
"$"
  $r->add(qw(cat carrot)
    ->anchor_line
    ->as_string; # produces '^ca(?:rro)t$'


"\A"
  $r->anchor_string_begin(1);


"\Z"
  # disable the string boundary end anchor
  $r->anchor_string_end(0);


"\z"
  # disable the string boundary absolute end anchor
  $r->anchor_string_end_absolute(0);


"\Z"
"\z"
"\A"
"\Z"
  $r->add(qw(cat carrot)
    ->anchor_string
    ->as_string; # produces '\Aca(?:rro)t\Z'


"\A"
"\z"
  $r->add(qw(cat carrot)
    ->anchor_string_absolute
    ->as_string; # produces '\Aca(?:rro)t\z'


"add"
"as_string"
"re"
  # load=<num>


  # reduce=<num>


"load-epoch"
"reduce-epoch"
  $r->debug(7)->add( '\\d+abc' );


"debug"
  print $r->dump;


"add_file"
"add("\\$/")"
"add()"
"chomp"
"chomp"
  $re->chomp(0); # really want the record separators
  $re->add(<DATA>);


"\s"
"\S"
"\w"
"\W"
"\d"
"\D"
"."
"/s"
"\n"
"/s"
  $re->add( '\\w', '\\W' );
  my $clone = $re->clone;

  $clone->fold_meta_pairs(0);
  print $clone->as_string; # prints '.'
  print $re->as_string;    # print '[\W\w]'


"as_string"
  $re->indent( 4 );
  print $re->as_string;


"imsx"
"flags"
"Regexp::List"
"({...}"
  $re->track( 1 );
  if( $target =~ /$re/ ) {
    print "$target matched by ", $re->matched, "\n";
  }


"brag|tag"
"(?:br|t)ag"
"dig|dim"
"di[gm]"
"a+"
"a"
"a*"
"b+?"
"b"
"b*?"
"a"
"eg/naive"
"/sl(?:ip|op|ap)/"
"/sl[aio]p/"
"Regexp::Optimizer"
"clone"
"add"
"insert"
"debug"
"lex"
"reduce"
"Default_Lexer"
"Default_Lexer"
"Regexp::Assemble"
    Regexp::Assemble::Default_Lexer( '\\d' );


  "Cannot pass a C<refname> to Default_Lexer"


"$obj->Default_Lexer"
"Regexp::Assemble::Default_Lexer"
  "filter method not passed a coderef"

  "pre_filter method not passed a coderef"


"filter"
  "duplicate pattern added: /.../"


"dup_warn"
  "cannot open [file] for input: [reason]"


"add_file"
"add('a\\d+b')"
"add('a\d+b')"
"X(?-\d+){2})Y"
"X-\d+-\d+Y"
"X\d+Z"
"X(?:(?:-\d+){2}Y|-\d+Z)"
"X-\d+(?:-\d+Y|Z)"
"{2}"
"-d\d+"
"-\d+Y"
"Z"
"Regexp::Assemble"
"a-b"
"axb"
"a\db"
"a[-\dx]b"
"-"
"^"
"X\d"
"X5"
5
"\d"
"X\d"
"."
"\d"
"\s"
"\W"
"\d"
"\D"
"\s"
"\S"
"\w"
"\W"
"."
"\d"
"\w"
"\D"
"\W"
"Regexp::Assemble"
"quotemeta"
"."
"$"
"\Q...\E"
"quotemeta"
"\Q"
"\E"
"\U...\E"
"\L...\E"
"lexstr"
  $pattern = join( '', @{$re->lexstr($pattern)} );


"Regexp::Assemble"
"\d"
"horse|bird|dog"
"bad"
"bit"
"few"
"fig"
"fun"
"(?:f(?:ew|ig|un)|b(?:ad|it))"
"pale"
"palm"
"pal[em]"
'e'
'm'
"pal(?:e|m)"
"dogfood"
"seafood"
"(?:dog|sea)food"
"(?:dogfood|seafood"
"match"
"use re 'eval'"
"Regex::PreSuf"
"Regexp::Optimizer"
"Regexp::Assemble"
"Regexp::Trie"
"Regexp::Optimizer"
"Regexp::Assemble"
"Text::Trie"
"Tree::Trie"
"Regexp::Assemble"
"R::A"
"/cabababc/"
"/c(?:ab){3}c/"
"Regexp::Assemble"
"X\d"
"X\d+"
"\d"
"\d+"
"X(?:\d|\d+)"
"Z"
"Z\d+"
"Z\d*"
"Z(?:\d+)?"
"remove"
"clone"
"Regexp::Assemble"
"(?>...)"
"\d"
"Regexp::Assemble"
"a\(bc)"
  my $pattern = $assembler->reduce(0)->re;


  perl -MRegexp::Assemble -le 'print $Regexp::Assemble::VERSION'
  perl -V


"Regexp::Assemble"
  perl -le 'print ref qr//'


  http://www.landgren.net/perl/


$r
      while(/(?!\+)(\S{2,}?)(\1+)/g) { ... $1, $2 ... }

    as a starting point.

Regexp::Assemble

NAME

SYNOPSIS

DESCRIPTION

Methods

add(LIST)

add_file(FILENAME [...])

clone()

insert(LIST)

lexstr

pre_filter(CODE)

filter(CODE)

as_string

re

match(SCALAR)

new()

source()

mbegin()

mend()

mvar(NUMBER)

capture

matched()

Statistics/Reporting routines

stats_add

stats_dup

stats_raw()

stats_cooked()

stats_length()

dup_warn(NUMBER|CODEREF)

Anchor routines

anchor_word_begin

anchor_word_end

anchor_word

anchor_line_begin

anchor_line_end

anchor_line

anchor_string_begin

anchor_string_end

anchor_string_end_absolute

anchor_string

anchor_string_absolute

debug(NUMBER)

dump()

chomp(0|1)

fold_meta_pairs(NUMBER)

indent(NUMBER)

lookahead(0|1)

flags(STRING)

modifiers(STRING)

track(0|1)

unroll_plus(0|1)

lex(SCALAR)

reduce(0|1)

mutable(0|1)

reset()

Default_Lexer

DIAGNOSTICS

NOTES

SEE ALSO

See Also

LIMITATIONS

BUGS

ACKNOWLEDGEMENTS

Machine-Readable Change Log

AUTHOR

Repository

TODO

LICENSE

Index