#!/usr/bin/ruby -w
#!ruby -w
# vim: set filetype=ruby : set sw=2

# An extended grep, with extended functionality including full regular
# expressions, contextual output, highlighting, detection and exclusion of
# nontext files, and complex matching criteria.

# $Id: glark.rb,v 1.2 2006/08/29 22:28:33 jeugenepace Exp $

require "English"
require 'singleton'

$stdout.sync = true             # unbuffer
$stderr.sync = true             # unbuffer

$PACKAGE = "glark"
$VERSION = "1.7.10"


module Text

  # Highlights text using either ANSI terminal codes, or HTML.

  # Note that the foreground and background sections can have modifiers
  # (attributes).
  # 
  # Examples:
  #     black
  #     blue on white
  #     bold green on yellow
  #     underscore bold magenta on cyan
  #     underscore red on cyan
  
  class Highlighter

    VERSION = "1.0.4"

    COLORS      = %w{ black red green yellow blue magenta cyan white }
    DECORATIONS = %w{ none reset bold underscore underline blink reverse }

    COLORS_RE = Regexp.new('(?: ' + 
                                # background will be in capture 0
                                'on(?:\s+|_) ( ' + COLORS.join(' | ') + ' ) | ' +
                                # foreground will be in capture 1
                                '( ' + (COLORS + DECORATIONS).join(' | ') + ' ) ' +
                            ')', Regexp::EXTENDED);

    DEFAULT_COLORS = [
      "black on yellow",
      "black on green",
      "black on magenta",
      "yellow on black",
      "magenta on black",
      "green on black",
      "cyan on black",
      "blue on yellow",
      "blue on magenta",
      "blue on green",
      "blue on cyan",
      "yellow on blue",
      "magenta on blue",
      "green on blue",
      "cyan on blue",
    ]
    
    def self.parse_colors(str)
      str.scan(Regexp.new(COLORS_RE)).collect do |color|
        color[0] ? "on_" + color[0] : color[1]
      end
    end

    def initialize(colors)
      @colors = colors
    end

    def highlight(str)
      pre + str + post
    end

    def to_s
      (@colors || '').join(' ')
    end

  end

  # Highlights using ANSI escape sequences.

  class ANSIHighlighter < Highlighter

    ATTRIBUTES = Hash[
      'none'       => '0', 
      'reset'      => '0',
      'bold'       => '1',
      'underscore' => '4',
      'underline'  => '4',
      'blink'      => '5',
      'reverse'    => '7',
      'concealed'  => '8',
      'black'      => '30',
      'red'        => '31',
      'green'      => '32',
      'yellow'     => '33',
      'blue'       => '34',
      'magenta'    => '35',
      'cyan'       => '36',
      'white'      => '37',
      'on_black'   => '40',
      'on_red'     => '41',
      'on_green'   => '42',
      'on_yellow'  => '43',
      'on_blue'    => '44',
      'on_magenta' => '45',
      'on_cyan'    => '46',
      'on_white'   => '47',
    ]

    RESET = "\e[0m"

    def self.make(str)
      colors = parse_colors(str)
      ANSIHighlighter.new(colors)
    end

    def initialize(colors)
      super
      @code = nil
      @reset = RESET
    end

    # Returns the escape sequence for the given name.

    def name_to_code(nm)
      "\e[#{ATTRIBUTES[nm]}m"
    end

    def highlight(str)
      @code ||= begin
                  @code = @colors.collect do |color|
                    name_to_code(color)
                  end.join("")
                end
      
      @code + str + RESET
    end

  end
  
end


class IO

  $-w = false

  # Reads the stream into an array. It works even when $/ == nil, which
  # works around a problem in Ruby 1.8.1.
  def readlines
    contents = []
    while ((line = gets) && line.length > 0)
      contents << line
    end
    contents
  end

  $-w = true

end


# -------------------------------------------------------
# Logging
# -------------------------------------------------------

# Very minimal logging output. If verbose is set, this displays the method and
# line number whence called. It can be a mixin to a class, which displays the
# class and method from where it called. If not in a class, it displays only the
# method.

# All kids love log.
class Log

  VERSION = "1.0.1"

  attr_reader :quiet
  
  module Severity
    DEBUG = 0
    INFO  = 1
    WARN  = 2
    ERROR = 3
    FATAL = 4
  end

  include Log::Severity

  WHENCE_RE = Regexp.new('(.*):(\d+)(?::in\s+\`(.*)\\\')?', Regexp::EXTENDED)

  def initialize
    @width   = 0
    @output  = $stdout
    @fmt     = "[%s:%04d] {%s}"
    @level   = FATAL
    @quiet   = false
  end
    
  def verbose=(v)
    @level = case v
             when TrueClass 
               DEBUG
             when FalseClass 
               FATAL
             when Integer
               v
             end
  end

  def verbose
    @level <= DEBUG
  end

  def level=(lvl)
    @level = lvl
  end

  # Assigns output to the given stream.
  def output=(io)
    @output = io
  end

  # Assigns output to a file with the given name. Returns the file; client
  # is responsible for closing it.
  def outfile=(f)
    @output = if f.kind_of?(IO) then f else File.new(f, "w") end
  end

  # Creates a printf format for the given widths, for aligning output.
  def set_widths(file_width, line_width, cls_width, meth_width)
    @fmt = "[%#{file_width}s:%#{line_width}d] {%#{cls_width}s\#%#{meth_width}s}"
  end

  def get_whence(c, classname)
    md   = WHENCE_RE.match(c)

    file = md[1].sub(%r{ .*/ }msx, "")
    line = md[2]
    cls  = classname || ""
    meth = md[3] || "???"
    [ file, line, cls, meth ]
  end

  # Logs the given message.
  def log(msg = "", level = DEBUG, depth = 1, cname = nil, &blk)
    if level >= @level
      c = caller(depth)[0]
      file, line, cls, meth = get_whence(c, cname)
      print_formatted(file, line, cls, meth, msg, level, &blk)
    end
  end

  # Shows the current stack.
  def stack(msg = "", level = DEBUG, depth = 1, cname = nil, &blk)
    if level >= @level
      stk = caller(depth)
      stk.shift
      for c in stk
        file, line, cls, meth = get_whence(c, cname)
        print_formatted(file, line, cls, meth, msg, level, &blk)
        msg = '"'
      end
    end
  end

  def print_formatted(file, line, cls, meth, msg, level, &blk)
    hdr = sprintf @fmt, file, line, cls, meth
    
    if blk
      x = blk.call
      if x.kind_of?(String)
        msg = x
      else
        return
      end
    else
      @output.puts hdr + " " + msg.to_s.chomp
    end
  end

  # by default, class methods delegate to a single app-wide log.

  @@log = Log.new

  def self.verbose
    @@log.verbose
  end

  def self.verbose=(v)
    @@log.verbose = v && v != 0 ? DEBUG : FATAL
  end

  def self.level=(lvl)
    @@log.level = lvl
  end

  def self.set_widths(file_width, line_width, cls_width, meth_width)
    @@log.set_widths(file_width, line_width, cls_width, meth_width)
  end

  def self.log(msg = "", level = DEBUG, depth = 1, cname = nil, &blk)
    @@log.log(msg, level, depth + 1, cname, &blk)
  end

  def self.stack(msg = "", level = DEBUG, depth = 1, cname = nil, &blk)
    @@log.stack(msg, level, depth, cname, &blk)
  end

  def self.warn(msg, depth = 1, cname = nil, &blk)
    write("WARNING: " + msg, depth + 1, cname, &blk)
  end

  def self.error(msg, depth = 1, cname = nil, &blk)
    if verbose
      stack(msg, Log::ERROR, depth + 1, cname, &blk)
    else
      $stderr.puts "ERROR: " + msg
    end
  end

  def self.write(msg, depth = 1, cname = nil, &blk)
    if verbose
      stack(msg, Log::WARN, depth + 1, cname, &blk)
    elsif GlarkOptions.instance.quiet
      # nothing
    else
      $stderr.puts msg
    end
  end

end


module Loggable

  # Logs the given message, including the class whence invoked.
  def log(msg = "", level = Log::DEBUG, depth = 1, &blk)
    Log.log(msg, level, depth + 1, self.class.to_s)
  end
  
  def stack(msg = "", level = Log::DEBUG, depth = 1, &blk)
    Log.stack(msg, level, depth + 1, self.class.to_s, &blk)
  end

  def warn(msg, depth = 1, &blk)
    Log.warn(msg, depth + 1, self.class.to_s, &blk)
  end

  def error(msg, depth = 1, &blk)
    Log.error(msg, depth + 1, self.class.to_s, &blk)
  end

  def write(msg, depth = 1, &blk)
    Log.write(msg, depth + 1, self.class.to_s, &blk)
  end

end


# -------------------------------------------------------
# File tester
# -------------------------------------------------------

class FileTester 
  include Loggable

  BINARY     = "binary"
  DIRECTORY  = "directory"
  NONE       = "none"
  TEXT       = "text"
  UNKNOWN    = "unknown"
  UNREADABLE = "unreadable"

  # the percentage of characters that we allow to be odd in a text file
  @@ODD_FACTOR = 0.3

  # how many bytes (characters) of a file we test
  @@TEST_LENGTH = 1024

  @@KNOWN = Hash.new

  # extensions associated with files that are always text:
  %w{ 
    c
    cpp
    css
    h
    f
    for
    fpp
    hpp
    html
    java
    mk
    php
    pl
    pm
    rb
    rbw
    txt
  }.each { |suf| @@KNOWN[suf] = true }

  # extensions associated with files that are never text:
  %w{ 
    Z
    a
    bz2
    elc
    gif
    gz
    jar
    jpeg
    jpg
    o
    obj
    pdf
    png
    ps
    tar
    zip
  }.each { |suf| @@KNOWN[suf] = false }

  def self.ascii?(c)
    # from ctype.h
    (c.to_i & ~0x7f) == 0
  end

  def self.type(file)
    begin
      case File.stat(file).ftype
      when "directory"
        DIRECTORY
      when "file"
        if File.readable?(file)
          FileTester.text?(file) ? TEXT : BINARY
        else
          UNREADABLE
        end
      else
        UNKNOWN
      end
    rescue Errno::ENOENT
      NONE
    end
  end

  def self.set_text(ext)
    @@KNOWN[ext] = true
  end

  def self.set_nontext(ext)
    @@KNOWN[ext] = false
  end

  def self.text_extensions
    @@KNOWN.keys.select { |suf| @@KNOWN[suf] }
  end

  def self.nontext_extensions
    @@KNOWN.keys.reject { |suf| @@KNOWN[suf] }
  end

  def self.text?(file)
    # Don't waste our time if it doesn't even exist:
    return false unless File.exists?(file)
    
    if file.index(/\.(\w+)\s*$/)
      suffix = $1
      if @@KNOWN.include?(suffix)
        return @@KNOWN[suffix]
      end
    end
    
    ntested = 0
    nodd = 0

    File.open(file) do |f|
      buf = f.read(@@TEST_LENGTH)
      if buf
        buf.each_byte do |ch|
          ntested += 1

          # never allow null in a text file
          return false if ch.to_i == 0
          
          nodd += 1 unless FileTester.ascii?(ch)
        end
      else
        # file had length of 0:
        return UNKNOWN
      end
    end
    FileTester.summary(nodd, ntested)
  end

  def self.summary(nodd, ntested)
    nodd < ntested * @@ODD_FACTOR
  end

end


# -------------------------------------------------------
# Input file
# -------------------------------------------------------

# A thing that can be grepped.
class InputFile
  include Loggable

  attr_reader :fname, :stati
  attr_accessor :count, :output, :invert_match

  # cross-platform end of line:   DOS  UNIX  MAC
  ANY_END_OF_LINE = Regexp.new('(?:\r\n|\n|\r)')

  WRITTEN = "written"
  
  def initialize(fname, io)
    @fname        = fname
    @io           = io
    @stati        = Array.new      # index = line number, value = context character
    @count        = nil
    @output       = nil
    @extracted    = nil
    @regions      = nil
    @modlines     = nil
    @invert_match = false
    @linecount    = nil
    @readall      = $/ != "\n"
    @lines        = @readall ? IO.readlines(@fname) : Array.new
  end
  
  def linecount
    @linecount ||= begin
                     IO.readlines(@fname).size
                   end
  end

  def each_line
    if @readall
      @lines.each do |line|
        yield line
      end
    else
      while (line = @io.gets) && line.length > 0
        @lines << line
        yield line
      end
    end
  end

  def set_status(from, to, ch, force = false)
    from.upto(to) do |ln|
      if (not @stati[ln]) || (@stati[ln] != WRITTEN && force)
        @stati[ln] = ch
      end
    end
  end

  def mark_as_match(start_line, end_line = start_line)
    if GlarkOptions.instance.output == "grep"
      end_line = start_line
    end

    GlarkOptions.instance.set_exit_status(invert_match ? 1 : 0)

    if @count
      @count += 1
    else
      st = [0, start_line - GlarkOptions.instance.before].max
      set_status(st,           start_line - 1,                         "-")
      set_status(start_line,   end_line,                               ":",  true)
      set_status(end_line + 1, end_line + GlarkOptions.instance.after, "+")
    end
  end

  def write_matches(matching, from = nil, to = nil)
    @output.write_matches(matching, from, to)
  end

  def write_all
    @output.write_all
  end

  # Returns the lines for this file, separated by end of line sequences.
  def get_lines
    if $/ == "\n"
      @lines
    else
      @extracted ||= begin
                       # This is much easier. Just resplit the whole thing at end of line
                       # sequences.
                       
                       eoline    = "\n"             # should be OS-dependent
                       srclines  = @lines
                       reallines = @lines.join("").split(ANY_END_OF_LINE)
                       
                       # "\n" after all but the last line
                       extracted = (0 ... (reallines.length - 1)).collect do |lnum|
                         reallines[lnum] + eoline
                       end
                       extracted << reallines[-1]

                       if Log.verbose
                         extracted.each_with_index do |line, idx|
                           log "extracted[#{idx}]: #{@extracted[idx]}"
                         end
                       end
                       extractd
                     end
    end
  end

  # Returns the given line for this file. For this method, a line ends with a
  # CR, as opposed to the "lines" method, which ends with $/.
  def get_line(lnum)
    log "lnum: #{lnum}"
    ln = get_lines()[lnum]
    log "ln: #{ln}"
    ln
  end

  # returns the range that is represented by the region number
  def get_range(rnum)
    if $/ == "\n"
      # easy case: range is the range number, unless it is out of range.
      rnum < @lines.length ? (rnum .. rnum) : nil
    else
      unless @regions
        srclines = @modlines ? @modlines : @lines

        @regions = []           # keys = region number; values = range of lines

        lstart = 0
        srclines.each do |line|
          lend = lstart
          # log "considering <<#{line.gsub(/\n/, '\\n')}>>"
          # log "line.chomped: <<#{line.chomp.gsub(/\n/, '\\n')}>>"
          line.scan(ANY_END_OF_LINE).each do |cr|
            # log "cr: #{cr}"
            lend += 1
          end

          @regions << Range.new(lstart, lend - 1)

          lstart = lend
        end
      end

      @regions[rnum]
    end
  end
end


# -------------------------------------------------------
# Binary input file
# -------------------------------------------------------

class BinaryFile < InputFile

  def write_matches(matching, from, to)
    if count
      write_count(matching)
    else
      puts "Binary file " + @fname + " matches"
    end
  end

end


# -------------------------------------------------------
# Output format
# -------------------------------------------------------

class OutputFormat
  include Loggable

  attr_reader :formatted, :infile, :show_file_name, :has_context

  def initialize(infile)
    opts             = GlarkOptions.instance

    @infile          = infile
    @show_file_name  = (opts.show_file_names ||
                        (opts.show_file_names == nil && 
                         (opts.label ||
                          $files.size > 1 ||
                          ($files[0] != "-" && FileTester.type($files[0]) == FileTester::DIRECTORY))))

    @formatted       = []
    @has_context     = false
    @out             = opts.out
  end

  # Prints the line, which is assumed to be 0-indexed, and is thus adjusted by
  # one.
  def print_line_number(lnum)
    @out.printf "%5d ", lnum + 1
  end

  # prints the line, and adjusts for the fact that in our world, lines are
  # 0-indexed, whereas they are displayed as if 1-indexed.
  def print_line(lnum, ch = nil)
    log "lnum #{lnum}, ch: '#{ch}'"
    begin
      lnums = @infile.get_range(lnum)
      log "lnums(#{lnum}): #{lnums}"
      if lnums
        lnums.each do |ln|
          if show_line_numbers
            print_line_number(ln)
            if ch && has_context
              @out.printf "%s ", ch
            end
          end
          line = @formatted[ln] || @infile.get_line(ln)
          @out.puts line
        end
      end
    rescue => e
      # puts e
      # puts e.backtrace
    end
  end

  def write_matches(matching, from, to)
    if @infile.count
      write_count(matching)
    elsif matching
      firstline = from ? from : 0
      lastline  = to   ? to   : @infile.get_lines.length - 1

      (firstline .. lastline).each do |ln|
        if @infile.stati[ln]
          unless @infile.stati[ln] == InputFile::WRITTEN
            if firstline > 0 && !@infile.stati[ln - 1] && has_context && GlarkOptions.instance.show_break
              @out.puts "  ---"
            end
            
            print_line(ln, @infile.stati[ln]) 

            # Log.verbose && log("setting @infile.stati[#{ln}] (#{@infile.stati[ln]}) to written")
            
            @infile.stati[ln] = InputFile::WRITTEN
          end
        end

      end
    else
      firstline = from ? from : 0
      lastline  = to ? to : @infile.get_lines.length - 1
      (firstline .. lastline).each do |ln|
        if @infile.stati[ln] != InputFile::WRITTEN && @infile.stati[ln] != ":"
          log "printing #{ln}"
          print_line(ln)
          @infile.stati[ln] = InputFile::WRITTEN
        end
      end
    end
  end

  def write_all
    (0 ... @infile.get_lines.length).each do |ln|
      print_line(ln) 
    end
  end

  def get_line_to_print(lnum)
    formatted[lnum] || infile.get_line(lnum)
  end

  def show_line_numbers
    GlarkOptions.instance.show_line_numbers
  end

end


# -------------------------------------------------------
# Glark output format
# -------------------------------------------------------

class GlarkOutputFormat < OutputFormat

  def initialize(infile)
    super
    @has_context = GlarkOptions.instance.after != 0 || GlarkOptions.instance.before != 0
    @file_header_shown = false
  end

  # prints the line, and adjusts for the fact that in our world, lines are
  # 0-indexed, whereas they are displayed as if 1-indexed.
  def print_line(lnum, ch = nil)
    log "lnum #{lnum}, ch: '#{ch}'"
    begin
      lnums = @infile.get_range(lnum)
      log "lnums(#{lnum}): #{lnums}"
      if lnums
        log "printing"
        lnums.each do |ln|
          println(ln, ch)
        end
      end
    rescue => e
      # puts e
      # puts e.backtrace
    end
  end

  def show_file_header
    if show_file_name && !@file_header_shown
      fname = GlarkOptions.instance.label || @infile.fname
      if GlarkOptions.instance.highlight
        @out.puts GlarkOptions.instance.file_highlight.highlight(fname)
      else
        @out.puts fname
      end
    end
    @file_header_shown = true
  end

  def print_line_number(lnum)
    if lnh = GlarkOptions.instance.line_number_highlight
      lnumstr = (lnum + 1).to_s
      pad = " " * ([5 - lnumstr.length, 0].max)
      @out.print pad + " " + lnh.highlight(lnumstr) + " "
    else
      super
    end
  end
 
  def write_count(matching = true)
    ct = matching ? @infile.count : @infile.get_lines.size - @infile.count
    @out.puts "    " + ct.to_s
  end

  def write_matches(matching, from = nil, to = nil)
    show_file_header
    super(matching, from, to)
  end

  def write_all
    show_file_header
    super
  end

  def println(ln, ch)
    if show_line_numbers
      print_line_number(ln)
    end
    
    if ch && has_context
      @out.printf "%s ", ch
    end

    line = get_line_to_print(ln)
    log "line: #{line}"
    
    @out.puts line
  end

end

class GlarkTextOutputFormat < GlarkOutputFormat
end

class GlarkANSIOutputFormat < GlarkOutputFormat
end

class GlarkMatchList < GlarkOutputFormat
  attr_reader :matches

  def initialize(infile)
    super
    @matches = Array.new
  end

  def write_matches(matching, from, to)
    stack "matching: #{matching}"
    from.upto(to) do |line|
      @matches[line] = true
    end
    log "matches: #{@matches}"
  end

end


# -------------------------------------------------------
# Grep output format
# -------------------------------------------------------

# This matches grep, mostly. It is for running within emacs, thus,
# it does not support context or highlighting.
class GrepOutputFormat < OutputFormat

  def write_count(matching = true)
    print_file_name
    ct = matching ? @infile.count : @infile.get_lines.length - @infile.count
    puts ct
  end

  # prints the line, and adjusts for the fact that in our world, lines are
  # 0-indexed, whereas they are displayed as if 1-indexed.
  def print_line(lnum, ch = nil)
    ln = get_line_to_print(lnum)

    if ln
      print_file_name
      if show_line_numbers
        printf "%d: ", lnum + 1
      end
      
      print ln
    end
  end

  def print_file_name
    if show_file_name
      fname = GlarkOptions.instance.label || @infile.fname
      print @infile.fname, ":"
    end
  end

end


# -------------------------------------------------------
# Glark
# -------------------------------------------------------

# The main processor.
class Glark 
  include Loggable
  
  def initialize(func)
    @func = func
    @files = Array.new          # files searched, so we don't cycle through links
  end

  def search_file(input)
    outclass = case GlarkOptions.instance.output
               when "grep"
                 GrepOutputFormat
               when "ansi", "xterm"
                 GlarkANSIOutputFormat
               when "match"
                 error "output to match list is not yet supported"
                 GlarkMatchList
                 # exit 2
               else
                 GlarkTextOutputFormat
               end
    
    output       = outclass.new(input)
    input.output = output

    input.count        = 0    if GlarkOptions.instance.count
    input.invert_match = true if GlarkOptions.instance.invert_match
    
    @func.process(input)
  end

  def search_binary_file(fname)
    log "searching binary file #{fname} for #{@func}"
    f = File.new(fname)
    f.binmode                # for MSDOS/WinWhatever
    bf = BinaryFile.new(fname, f)
    search_file(bf)
  end

  def basename_skip?(fname)
    basename = File.basename(fname)
    options = GlarkOptions.instance
    inc = options.with_basename    && !options.with_basename.match(basename)
    exc = options.without_basename &&  options.without_basename.match(basename)
    inc || exc
  end

  def fullname_skip?(fname)
    options = GlarkOptions.instance
    inc = options.with_fullname    && !options.with_fullname.match(fname)
    exc = options.without_fullname &&  options.without_fullname.match(fname)
    inc || exc
  end

  def skipped?(fname)
    options = GlarkOptions.instance
    basename_skip?(fname) || fullname_skip?(fname) ||
     (options.size_limit && File.size(fname) > options.size_limit)
  end

  def search_text(fname)
    if skipped?(fname)
      log "skipping file: #{fname}"
    else
      log "searching text"
      if false
        # readlines doesn't work with $/ == nil, so we'll use gets instead.
        # this has been fixed in the CVS version of Ruby (on 26 Dec 2003).
        text = []
        File.open(fname) do |f|
          while ((line = f.gets) && line.length > 0)
            text << line
          end
        end
        log "got text #{text.length}"
      end
      log "searching #{fname} for #{@func}"
      input = InputFile.new(fname, fname == "-" ? $stdin : File.new(fname))
      search_file(input)
    end
  end

  def search_binary(fname)
    if skipped?(fname)
      log "skipping file: #{fname}"
    else
      log "handling binary"
      
      case GlarkOptions.instance.binary_files
      when "without-match"
        log "skipping binary file #{fname}"
        
      when "binary"
        log "searching binary file #{fname} for #{@func}"
        f = File.new(fname)
        f.binmode                # for MSDOS/WinWhatever
        bf = BinaryFile.new(fname, f)
        search_file(bf)
        
      when "text"
        log "processing binary file #{name} as text"
        search_text(fname)
      end
    end
  end

  def search_directory(fname)
    log "processing directory"
    case GlarkOptions.instance.directory
    when "read"
      write "#{fname}: Is a directory"
    when "recurse"
      log "recursing into directory #{fname}"
      begin
        entries = Dir.entries(fname).reject { |x| x == "." || x == ".." }
        entries.each do |e|
          entname = fname + "/" + e
          inode = File.exists?(entname) && File.stat(entname).ino
          if inode && @files.include?(inode)
            Log.verbose && log("file already processed: #{entname}")
          else
            @files << inode
            search(entname)
          end
        end
      rescue Errno::EACCES => e
        write "directory not readable: #{fname}"
      end
    when "skip"
      log "skipping directory #{fname}"
    else
      log "directory: #{GlarkOptions.instance.directory}"
    end
  end

  def search_unknown(fname)
    warn "unknown file type: #{fname}"
  end
        
  def search_none(fname)
    write "no such file: #{fname}"
  end

  def search_unreadable(fname)
    log "skipping unreadable: #{fname}"
  end

  def search(name)
    log "searching #{name} for #{@func}"

    if GlarkOptions.instance.exclude_matching
      expr = GlarkOptions.instance.expr
      if expr.respond_to?(:re) && expr.re.match(name)
        log "skipping file #{name} with matching name"
        return
      else
        log "not skipping file #{name}"
      end
    end
        
    if name == "-" 
      write "reading standard input..."
      search_text("-")
    else
      type = FileTester.type(name)

      case type
      when FileTester::BINARY
        search_binary(name)
      when FileTester::DIRECTORY
        search_directory(name)
      when FileTester::NONE
        search_none(name)
      when FileTester::TEXT
        search_text(name)
      when FileTester::UNKNOWN
        search_unknown(name)
      when FileTester::UNREADABLE
        search_unreadable(name)
      else
        error "type unknown: file: #{name}; type: #{type}"
        exit(-2)
      end
    end
  end
end


# -------------------------------------------------------
# Env
# -------------------------------------------------------

# Returns the home directory, for both Unix and Windows.

module Env

  def Env.home_directory
    if hm = ENV["HOME"]
      hm
    else
      hd = ENV["HOMEDRIVE"]
      hp = ENV["HOMEPATH"]
      if hd || hp
        (hd || "") + (hp || "\\")
      else
        nil
      end
    end
  end

  # matches single and double quoted strings:
  REGEXP = Regexp.new('              # either:
                       ([\"\'])      #   start with a quote, and save it ($1)
                       (             #   save this ($2)
                          (?:        #     either (and do not save this):
                           \\.       #       any escaped character
                         |           #     or
                           [^\1\\\\] #       anything that is not a quote ($1), and is not a backslash
                         )*          #     as many as we can get
                       )             #     end of $2
                       \1            #   end with the same quote we started with
                       |             # or
                       (\S+)         #   plain old nonwhitespace ($3)
                      ', 
                      Regexp::EXTENDED)
  
  # amazing that ruby-mode (Emacs) handled all that.
  
  # reads the environment variable, splitting it according to its quoting.
  def Env.split(varname)
    if v = ENV[varname]
      v.scan(REGEXP).collect { |x| x[1] || x[2] }
    else
      []
    end
  end

end


# -------------------------------------------------------
# Function Object
# -------------------------------------------------------

# A function object, which can be applied (processed) against a InputFile.
class FuncObj
  include Loggable
  
  attr_accessor :match_line_number, :file, :matches, :invert_match

  def initialize
    @match_line_number = nil
    @matches = Array.new
    @invert_match = GlarkOptions.instance.invert_match
  end

  def add_match(lnum)
    @matches.push(lnum)
  end

  def start_position
    match_line_number
  end

  def end_position
    start_position
  end

  def reset_file(file)
    @match_line_number = nil
    @file              = file
    @matches           = Array.new
  end

  def range(var, infile)
    if var
      if var.index(/([\.\d]+)%/)
        count = infile.linecount
        count * $1.to_f / 100
      else
        var.to_f
      end
    else
      nil
    end
  end

  def process(infile)
    got_match = false
    reset_file(infile.fname)

    @opts = GlarkOptions.instance
    
    rgstart  = range(@opts.range_start, infile)
    rgend    = range(@opts.range_end,   infile)

    displaymatches = !@opts.file_names_only && @opts.filter && !@opts.count

    lastmatch = 0
    nmatches = 0
    lnum = 0
    infile.each_line do |line|
      if ((!rgstart || lnum >= rgstart) && 
          (!rgend   || lnum <= rgend)   &&
          evaluate(line, lnum, infile))
        
        mark_as_match(infile)
        got_match = true
        nmatches += 1
        
        if displaymatches
          infile.write_matches(!invert_match, lastmatch, lnum)
          lastmatch = lnum + 1
        elsif @opts.file_names_only
          # we don't need to match more than once
          break
        end
        
        if @opts.match_limit && nmatches >= @opts.match_limit
          # we've found the match limit
          break
        end
      end
      lnum += 1
    end
    
    if @opts.file_names_only
      if got_match != invert_match
        print infile.fname
        if @opts.write_null
          print "\0"
        else
          puts
        end
      end
    elsif @opts.filter
      if invert_match
        infile.write_matches(false, 0, lnum)
      elsif got_match
        infile.write_matches(true, 0, lnum)
      end
    else
      infile.write_all
    end
  end

  def mark_as_match(infile)
    infile.mark_as_match(start_position)
  end

  def to_s
    inspect
  end
  
end


# -------------------------------------------------------
# Regular expression function object
# -------------------------------------------------------

# Applies a regular expression against a InputFile.
class RegexpFuncObj < FuncObj

  attr_reader :re

  def initialize(re, hlidx)
    @re      = re
    @file    = nil
    @opts    = GlarkOptions.instance
    @hlidx   = if @opts.highlight && 
                   @opts.text_highlights.length > 0 &&
                   @opts.highlight == "multi"
                 hlidx % @opts.text_highlights.length
               else
                 0
               end 
    super()
  end

  def inspect
    @re.inspect
  end

  def match?(line)
    @re.match(line)
  end

  def evaluate(line, lnum, file)
    if Log.verbose
      log "evaluating <<<#{line[0 .. -2]}>>>"
    end
    
    if md = match?(line)      
      log "matched"
      if @opts.extract_matches
        if md.kind_of?(MatchData)
          line.replace(md[-1] + "\n")
        else
          warn "--not does not work with -v"
        end
      else
        # log "NOT replacing line"
      end
      
      @match_line_number = lnum

      # highlight what the regular expression matched
      if @opts.highlight
        log "highlighting the matched expression"

        lnums = file.get_range(lnum)
        log "lnums(#{lnum}): #{lnums}"
        if lnums
          lnums.each do |ln|
          str = file.output.formatted[ln] || file.get_line(ln)
            if Log.verbose
              log "file.output.formatted[#{ln}]: #{file.output.formatted[ln]}"
              log "file.get_line(#{ln}): #{file.get_line(ln)}"
              log "highlighting: #{str}"
            end

            file.output.formatted[ln] = str.gsub(@re) do |m|
              lastcapts = Regexp.last_match.captures
              miidx = (0 ... lastcapts.length).find { |mi| lastcapts[mi] } || @hlidx

              @opts.text_highlights[miidx].highlight(m)
            end
          end
        end
      end
      add_match(lnum)
      true
    else
      false
    end
  end
  
  def explain(level = 0)
    " " * level + to_s + "\n"
  end
  
end


# -------------------------------------------------------
# Regular expression extension
# -------------------------------------------------------

# Negates the given expression.
class NegatedRegexp < Regexp

  def match(str)
    !super
  end

end

class Regexp

  # Handles negation, whole words, and ignore case (Ruby no longer supports
  # Rexexp.new(/foo/i), as of 1.8).
  
  def self.create(pattern, negated = false, ignorecase = false, wholewords = false, wholelines = false)
    # we handle a ridiculous number of possibilities here:
    #     /foobar/     -- "foobar"
    #     /foo/bar/    -- "foo", then slash, then "bar"
    #     /foo\/bar/   -- same as above
    #     /foo/bar/i   -- same as above, case insensitive
    #     /foo/bari    -- "/foo/bari" exactly
    #     /foo/bar\/i  -- "/foo/bar/i" exactly
    #     foo/bar/     -- "foo/bar/" exactly
    #     foo/bar/     -- "foo/bar/" exactly

    if pattern.sub!(/^!(?=\/)/, "")
      negated = true
    end

    if pattern.index(/^\/(.*[^\\])\/i$/)
      pattern    = $1
      ignorecase = true
    elsif pattern.index(/^\/(.*[^\\])\/$/)
      pattern    = $1
    elsif pattern.index(/^(\/.*)$/)
      pattern    = $1
    elsif pattern.index(/^(.*\/)$/)
      pattern    = $1
    end
    
    if wholewords
      # sanity check:

      # match "\w", A-Za-z0-9_, 
      stword = pattern.index(/^[\[\(]*(?:\\w|\w)/)

      re = Regexp.new('(?:                 # one of the following:
                           \\w             #   - \w for regexp
                         |                 # 
                           \w              #   - a literal A-Z, a-z, 0-9, or _
                         |                 # 
                           (?:             #   - one of the following:
                               \[[^\]]*    #         LB, with no RB until:
                               (?:         #      - either of:
                                   \\w     #         - "\w"
                                 |         # 
                                   \w      #         - a literal A-Z, a-z, 0-9, or _
                               )           #      
                               [^\]]*\]    #      - anything (except RB) to the next RB
                           )               #
                       )                   #
                       (?:                 # optionally, one of the following:
                           \*              #   - "*"
                         |                 # 
                           \+              #   - "+"
                         |                 #
                           \?              #   - "?"
                         |                 #
                           \{\d*,\d*\}     #   - "{3,4}", "{,4}, "{,123}" (also matches the invalid {,})
                       )?                  #
                       $                   # fin
                      ', 
                      Regexp::EXTENDED)
      endword = pattern.index(re)
      errs    = [ stword ? nil : "start", endword ? nil : "end" ].compact

      if errs.length > 0
        Log.warn "pattern '#{pattern}' does not " + errs.join(" and ") + " on a word boundary"
      end
      pattern = '\b' + pattern + '\b'
    elsif wholelines
      pattern = '^'  + pattern + '$'        # ' for emacs
    end
    
    # log "pattern", pattern
    # log "ignorecase", ignorecase
    
    reclass = negated ? NegatedRegexp : Regexp
    if ignorecase
      regex = reclass.new(pattern, Regexp::IGNORECASE)
    else
      regex = reclass.new(pattern)
    end

    regex
  end
end


# -------------------------------------------------------
# Compound expression function object
# -------------------------------------------------------

# Associates a pair of expressions.
class CompoundExpression < FuncObj

  attr_reader :ops

  def initialize(*ops)
    @ops  = ops
    @file = nil
    super()
  end

  def reset_file(file)
    @ops.each do |op|
      op.reset_file(file)
    end
    super
  end

  def start_position
    @last_start
  end
  
end


# -------------------------------------------------------
# Multi-Or expression function object
# -------------------------------------------------------

# Evaluates both expressions.
class MultiOrExpression < CompoundExpression

  def evaluate(line, lnum, file)
    # log self, "evaluating <<<#{line[0 .. -2]}>>>"

    matched_ops = @ops.select do |op|
      op.evaluate(line, lnum, file)
    end

    if is_match?(matched_ops)
      lastmatch          = matched_ops[-1]
      @last_start        = lastmatch.start_position
      @last_end          = lastmatch.end_position
      @match_line_number = lnum
      
      add_match(lnum)
      true
    else
      false
    end
  end

  def inspect
    "(" + @ops.collect { |op| op.to_s }.join(" " + operator + " ") + ")"
  end

  def end_position
    @last_end
  end

  def explain(level = 0)
    str  = " " * level + criteria + ":\n"
    str += @ops.collect { |op| op.explain(level + 4) }.join(" " * level + operator + "\n")
    str
  end
  
end


# -------------------------------------------------------
# Inclusive or expression function object
# -------------------------------------------------------

# Evaluates the expressions, and is satisfied when one return true.
class InclusiveOrExpression < MultiOrExpression

  def is_match?(matched_ops)
    return matched_ops.size > 0
  end

  def operator
    "or"
  end

  def criteria
    ops.size == 2 ? "either" : "any of"
  end
  
end


# -------------------------------------------------------
# Exclusive or expression function object
# -------------------------------------------------------

# Evaluates the expressions, and is satisfied when only one returns true.
class ExclusiveOrExpression < MultiOrExpression

  def is_match?(matched_ops)
    return matched_ops.size == 1
  end

  def operator
    "xor"
  end

  def criteria
    "only one of"
  end

end


# -------------------------------------------------------
# And expression function object
# -------------------------------------------------------

# Evaluates both expressions, and is satisfied when both return true.
class AndExpression < CompoundExpression
  
  def initialize(dist, op1, op2)
    @dist = dist
    super(op1, op2)
  end

  def mark_as_match(infile)
    infile.mark_as_match(start_position, end_position)
  end

  def match_within_distance(op, lnum)
    stack "op: #{op}; lnum: #{lnum}"
    op.matches.size > 0 and (op.matches[-1] - lnum <= @dist)
  end

  def inspect
    str = "("+ @ops[0].to_s
    if @dist == 0
      str += " same line as "
    elsif @dist.kind_of?(Float) && @dist.infinite?
      str += " same file as "
    else 
      str += " within " + @dist.to_s + " lines of "
    end
    str += @ops[1].to_s + ")"
    str
  end

  def match?(line, lnum, file)
    matches = (0 ... @ops.length).select do |oi|
      @ops[oi].evaluate(line, lnum, file)
    end

    matches.each do |mi|
      oidx  = (1 + mi) % @ops.length
      other = @ops[oidx]
      if match_within_distance(other, lnum)
        # search for the maximum match within the distance limit
        other.matches.each do |m|
          if lnum - m <= @dist
            log "match: #{m} within range #{@dist} of #{lnum}"
            @last_start = m
            return true
          end
        end
        log "other matches out of range"
        return false
      end
    end

    return false
  end
  
  def end_position
    @ops.collect { |op| op.end_position }.max
  end

  def evaluate(line, lnum, file)
    # log self, "evaluating line #{lnum}: #{line[0 .. -2]}"

    if match?(line, lnum, file)
      @match_line_number = lnum
      true
    else
      false
    end
  end

  def explain(level = 0)
    str = ""
    if @dist == 0
      str += " " * level + "on the same line:\n"
    elsif @dist.kind_of?(Float) && @dist.infinite?
      str += " " * level + "in the same file:\n"
    else 
      lnstr = @dist == 1 ? "line" : "lines"
      str += " " * level + "within #{@dist} #{lnstr} of each other:\n"
    end
    str += @ops[0].explain(level + 4)
    str += " " * level + "and\n"
    str += @ops[1].explain(level + 4)
    str
  end
  
end


# -------------------------------------------------------
# Expression function object creator
# -------------------------------------------------------

class ExpressionFactory
  include Loggable

  # signifies no limit to the distance between matches, i.e., anywhere within
  # the entire file is valid.
  INFINITE_DISTANCE = -1

  attr_reader :expr

  def initialize
    @regexps = 0
  end

  # reads a file containing one regular expression per line.
  def read_file(fname)
    log "reading file: #{fname}"
    expr = nil
    File.open(fname) do |file|
      file.each_line do |line|
        log "line: #{line}"
        line.strip!
        unless line.empty?
          re = make_regular_expression(line.chomp)
          if expr
            expr = InclusiveOrExpression.new(expr, re)
          else
            expr = re
          end
        end
      end
    end
    log "returning expression #{expr}"
    expr
  end

  def make_regular_expression(pattern, negated = false)
    # this check is because they may have omitted the pattern, e.g.:
    #   % glark *.cpp
    if File.exists?(pattern)
      warn "pattern '#{pattern}' exists as a file.\n    Pattern may have been omitted."
    end

    regex = Regexp.create(pattern.dup,
                          negated,
                          GlarkOptions.instance.nocase,
                          GlarkOptions.instance.whole_words,
                          GlarkOptions.instance.whole_lines)
    re = RegexpFuncObj.new(regex, @regexps)
    @regexps += 1
    re
  end

  # creates two expressions and returns them.
  def make_expressions(args)
    arg = args.shift
    a1 = make_expression(arg, args)

    arg = args.shift
    a2 = make_expression(arg, args)
    
    [ a1, a2 ]
  end

  # removes optional end tag
  def shift_end_tag(name, args)
    # explicit end tag is optional:
    args.shift if args[0] == ("--end-of-" + name)
  end
  
  def make_not_expression(args)
    expr = make_regular_expression(args, true)
    unless expr
      error "'not' expression takes one argument"
      exit 2
    end

    # explicit end tag is optional:
    shift_end_tag("not", args)
    expr
  end

  def make_two_expressions(args, type)
    a1, a2 = make_expressions(args)
    unless a1 && a2
      error "'" + type + "' expression takes two arguments"
      exit 2
    end

    shift_end_tag(type, args)
    [ a1, a2 ]
  end

  def make_or_expression(args)
    a1, a2 = make_two_expressions(args, "or")
    InclusiveOrExpression.new(a1, a2)
  end

  def make_xor_expression(args)
    a1, a2 = make_two_expressions(args, "xor")
    ExclusiveOrExpression.new(a1, a2)
  end

  def numeric?(x)
    # check to ensure that this is numeric
    x && (x.to_i == INFINITE_DISTANCE || x.index(/^\d+$/))
  end  

  def make_and_distance(arg, args)
    if arg == "-a"
      dist = args.shift
    elsif arg == "--and"
      if args.size > 0 && numeric?(args[0])
        dist = args.shift
      else
        dist = "0"
      end
    elsif arg.index(/^--and=(\-?\d+)?$/)
      dist = $1
    end
    
    # check to ensure that this is numeric
    if !numeric?(dist)
      error "invalid distance for 'and' expression: '#{dist}'\n" +
        "    expecting an integer, or #{INFINITE_DISTANCE} for 'infinite'" 
      exit 2
    end
    
    if dist.to_i == INFINITE_DISTANCE
      dist = 1.0 / 0.0            # infinity
    else
      dist = dist.to_i
    end

    dist
  end
  
  def make_and_expression(arg, args)
    dist = make_and_distance(arg, args)

    a1, a2 = make_two_expressions(args, "and")
    AndExpression.new(dist, a1, a2)
  end

  def make_infix_expression(arg, args = [])
    expr = nil

    while arg
      case arg
      when '('
        arg  = args.shift
        expr = make_infix_expression(arg, args)
      when '--or', '-o'
        arg  = args.shift
        rhs  = make_infix_expression(arg, args)
        expr = InclusiveOrExpression.new(expr, rhs)
      when '--xor'
        arg  = args.shift
        rhs  = make_infix_expression(arg, args)
        expr = ExclusiveOrExpression.new(expr, rhs)
      when Regexp.new('^--and'), '-a'
        dist = make_and_distance(arg, args)
        arg  = args.shift
        rhs  = make_infix_expression(arg, args)
        expr = AndExpression.new(dist, expr, rhs)
      when ')'
        break
      else
        # blather "assuming the last argument #{arg} is a pattern"
        expr = make_regular_expression(arg)
        break
      end
      arg = args.shift
    end

    if !expr
      error "No expression provided."
    end

    expr
  end

  def make_expression(arg, args = [], warn_option = false)
    if arg
      log "processing arg #{arg}"
      case arg
      when "--or", "-o"
        make_or_expression(args)
      when "--xor"
        make_xor_expression(args)
      when %r{^\-\-and}, %r{^\-a}
        make_and_expression(arg, args)
      when '('
        make_infix_expression(arg, args)
      else
        if warn_option && arg.index(/^\-{1,2}\w/)
          warn "option not understood: #{arg}"
          exit 2
        end

        # blather "assuming the last argument #{arg} is a pattern"
        make_regular_expression(arg)
      end
    else
      nil
    end
  end

end


# -------------------------------------------------------
# Help
# -------------------------------------------------------

class GlarkHelp

  def initialize
    puts "Usage: glark [options] expression file..."
    puts "Search for expression in each file or standard input."
    puts "Example: glark --and=3 'try' 'catch' *.java"
    puts ""

    puts "Input:"
    puts "  -0[nnn]                        Use \\nnn as the input record separator"
    puts "  -d, --directories=ACTION       Process directories as read, skip, or recurse"
    puts "      --binary-files=TYPE        Treat binary files as TYPE"
    puts "      --[with-]basename, "
    puts "      --[with-]name EXPR         Search only files with base names matching EXPR"
    puts "      --without-basename, "
    puts "      --without-name EXPR        Ignore files with base names matching EXPR"
    puts "      --[with-]fullname, "
    puts "      --[with-]path EXPR         Search only files with full names matching EXPR"
    puts "      --without-fullname, "
    puts "      --without-path EXPR        Ignore files with full names matching EXPR"
    puts "  -M, --exclude-matching         Ignore files with names matching the expression"
    puts "  -r, --recurse                  Recurse through directories"
    puts "      --size-limit=SIZE          Search only files no larger than SIZE"
    puts ""

    puts "Matching:"
    puts "  -a, --and=NUM EXPR1 EXPR2      Match both expressions, within NUM lines"
    puts "  -b, --before NUM[%]            Restrict the search to the top % or lines"
    puts "      --after NUM[%]             Restrict the search to after the given location"
    puts "  -f, --file=FILE                Use the lines in the given file as expressions"
    puts "  -i, --ignore-case              Ignore case for matching regular expressions"
    puts "  -m, --match-limit=NUM          Find only the first NUM matches in each file"
    puts "  -o, --or EXPR1 EXPR2           Match either of the two expressions"
    puts "  -R, --range NUM[%] NUM[%]      Restrict the search to the given range of lines"
    puts "  -v, --invert-match             Show lines not matching the expression"
    puts "  -w, --word, --word-regexp      Put word boundaries around each pattern"
    puts "  -x, --line-regexp              Select entire line matching pattern"
    puts "      --xor EXPR1 EXPR2          Match either expression, but not both"
    puts ""

    puts "Output:"
    puts "  -A, --after-context=NUM        Print NUM lines of trailing context"
    puts "  -B, --before-context=NUM       Print NUM lines of leading context"
    puts "  -C, -NUM, --context[=NUM]      Output NUM lines of context"
    puts "  -c, --count                    Display only the match count per file"
    puts "  -F, --file-color COLOR         Specify the highlight color for file names"
    puts "      --no-filter                Display the entire file"
    puts "  -g, --grep                     Produce output like the grep default"
    puts "  -h, --no-filename              Do not display the names of matching files"
    puts "  -H, --with-filename            Display the names of matching files"
    puts "  -l, --files-with-matches       Print only names of matching file"
    puts "  -L, --files-without-match      Print only names of file not matching"
    puts "      --label=NAME               Use NAME as output file name"
    puts "  -n, --line-number              Display line numbers"
    puts "  -N, --no-line-number           Do not display line numbers"
    puts "      --line-number-color COLOR  Specify the highlight color for line numbers"
    # puts "      --output=FORMAT            Produce output in the format (ansi, grep)"
    puts "  -T, --text-color COLOR         Specify the highlight color for text"
    puts "      --text-color-NUM COLOR     Specify the highlight color for regexp capture NUM"
    puts "  -u, --highlight[=FORMAT]       Enable highlighting. Format is single or multi"
    puts "  -U, --no-highlight             Disable highlighting"
    puts "  -y, --extract-matches          Display only the matching region, not the entire line"
    puts "  -Z, --null                     In -l mode, write file names followed by NULL"
    puts ""

    puts "Debugging/Errors:"
    puts "      --conf                     Write the current options in RC file format"
    puts "      --dump                     Write all options and expressions"
    puts "      --explain                  Write the expression in a more legible format"
    puts "  -q, --quiet                    Suppress warnings"
    puts "  -Q, --no-quiet                 Enable warnings"
    puts "  -s, --no-messages              Suppress warnings"
    puts "  -V, --version                  Display version information"
    puts "      --verbose                  Display normally suppressed output"

    puts ""
    puts "See the man page for more information."
  end

end


# -------------------------------------------------------
# Options
# -------------------------------------------------------

class GlarkOptions
  include Loggable, Singleton

  attr_accessor :after
  attr_accessor :before
  attr_accessor :binary_files
  attr_accessor :count
  attr_accessor :directory
  attr_accessor :exclude_matching
  attr_accessor :exit_status
  attr_accessor :explain
  attr_accessor :expr
  attr_accessor :extract_matches
  attr_accessor :file_highlight
  attr_accessor :file_names_only
  attr_accessor :filter
  attr_reader   :highlight
  attr_accessor :highlighter
  attr_accessor :invert_match
  attr_accessor :label
  attr_accessor :line_number_highlight
  attr_accessor :local_config_files
  attr_accessor :match_limit
  attr_accessor :nocase
  attr_accessor :output
  attr_accessor :quiet
  attr_accessor :range_end
  attr_accessor :range_start
  attr_accessor :show_break
  attr_accessor :show_file_names
  attr_accessor :show_line_numbers
  attr_accessor :size_limit
  attr_accessor :split_as_path
  attr_accessor :text_highlights
  attr_accessor :verbose
  attr_accessor :version
  attr_accessor :whole_lines
  attr_accessor :whole_words
  attr_accessor :with_basename
  attr_accessor :without_basename
  attr_accessor :with_fullname
  attr_accessor :without_fullname
  attr_accessor :write_null
  attr_accessor :out
  attr_accessor :err

  def initialize
    reset
  end

  def reset
    @after             = 0          # lines of context before the match
    @before            = 0          # lines of context after the match
    @binary_files      = "binary"   # 
    @count             = false      # just count the lines
    @directory         = "read"     # read, skip, or recurse, a la grep
    @expr              = nil        # the expression to be evaluated
    @exclude_matching  = false      # exclude files whose names match the expression
    @exit_status       = 1          # 0 == matches, 1 == no matches, 2 == error
    @explain           = false      # display a legible version of the expression
    @extract_matches   = false      # whether to show _only_ the part that matched
    @file_names_only   = false      # display only the file names
    @filter            = true       # display only matches
    @invert_match      = false      # display non-matching lines
    @nocase            = false      # match case
    @match_limit       = nil        # the maximum number of matches to display per file
    @local_config_files = false
    @quiet             = false      # minimize warnings
    @range_end         = nil        # range to stop searching; nil => the entire file
    @range_start       = nil        # range to begin searching; nil => the entire file
    @show_line_numbers = true       # display numbers of matching lines
    @show_file_names   = nil        # show the names of matching files; nil == > 1; true == >= 1; false means never
    @verbose           = nil        # display debugging output
    @whole_lines       = false      # true means patterns must match the entire line
    @whole_words       = false      # true means all patterns are '\b'ed front and back
    @write_null        = false      # in @file_names_only mode, write '\0' instead of '\n'
    @with_basename     = nil        # match files with this basename
    @without_basename  = nil        # match files without this basename
    @with_fullname     = nil        # match files with this fullname
    @without_fullname  = nil        # match files without this fullname
    @show_break        = false      # whether to show the break between sections
    @split_as_path     = true       # whether to split arguments that include the path separator

    @highlight         = "multi"    # highlight matches (using ANSI codes)

    @text_highlights       = []
    @file_highlight        = nil
    @line_number_highlight = nil

    @label = nil

    @size_limit = nil
    @out = $stdout
    @err = $stderr

    set_output_style("ansi")

    reset_colors
  end

  def set_exit_status(status)
    @exit_status = status
  end

  def multi_colors 
    Text::Highlighter::DEFAULT_COLORS.collect { |color| @highlighter.make(color) }
  end

  def single_color
    Text::Highlighter::DEFAULT_COLORS[0, 1].collect { |color| @highlighter.make(color) }
  end

  def reset_colors
    if @highlight && @highlighter
      @text_highlights       = case @highlight
                               when "multi", "on", "true", "yes", true
                                 multi_colors
                               when "single"
                                 single_color
                               when "none", "off", "false", "no", false
                                 []
                               else
                                 warn "highlight format '" + @highlight.to_s + "' not recognized"
                                 single_color
                               end
      @file_highlight        = @highlighter.make("reverse bold")
      @line_number_highlight = nil # @highlighter.make("none")
    else
      @text_highlights       = []
      @file_highlight        = nil
      @line_number_highlight = nil
    end
    
    # sprintf("%s: %s\n", "text_highlights", @text_highlights.collect { |hl| hl.highlight("text") }.join(", "))
  end

  def highlight=(type)
    @highlight = type
    reset_colors
  end

  def set_output_style(output)
    @output      = output

    log sprintf("%s: %s\n", "text_highlights", @text_highlights.collect { |hl| hl.highlight("text") }.join(", "))

    @highlighter = case @output
                   when "ansi", "xterm"
                     Text::ANSIHighlighter
                   when "grep"
                     @highlight         = false
                     @show_line_numbers = false
                     @after             = 0
                     @before            = 0
                     nil
                   when "text", "match"
                     @highlight         = nil
                     nil
                   end
    
    reset_colors
  end

  def run(args)
    @args = args

    if hd = Env.home_directory
      homerc = hd + "/.glarkrc"
      read_rcfile(homerc)
    end

    if @local_config_files
      dir = File.expand_path(".")
      while dir != "/" && dir != hd
        rcfile = dir + "/.glarkrc"
        if File.exists?(rcfile)
          read_rcfile(rcfile)
          break
        else
          dir = File.dirname(dir)
        end
      end
    end

    read_environment_variable

    # honor thy EMACS; go to grep mode
    if ENV["EMACS"]
      set_output_style("grep") 
    end

    read_options

    validate
  end

  def read_rcfile(fname)
    if File.exists?(fname)
      IO.readlines(fname).each do |line|
        line.sub!(/\s*#.*/, "")
        line.chomp!
        name, value = line.split(/\s*[=:]\s*/)
        next unless name && value

        case name
        when "after-context"
          @after = value.to_i
        when "before-context"
          @before = value.to_i
        when "binary-files"
          @binary_files = value
        when "context"
          @after = @before = value == "all" ? -1 : value.to_i
        when "expression"
          # this should be more intelligent than just splitting on whitespace:
          @expr = ExpressionFactory.new.make_expression(value.split(/\s+/))
        when "file-color"
          @file_highlight = make_highlight(name, value)
        when "filter"
          @filter = to_boolean(value)
        when "grep"
          set_output_style("grep") if to_boolean(value)
        when "highlight"
          @highlight = value
        when "ignore-case"
          @nocase = to_boolean(value)
        when "known-nontext-files"
          value.split.each do |ext|
            FileTester.set_nontext(ext)
          end
        when "known-text-files"
          value.split.each do |ext|
            FileTester.set_text(ext)
          end
        when "local-config-files"
          @local_config_files = to_boolean(value)
        when "line-number-color"
          @line_number_highlight = make_highlight(name, value)
        when "output"
          set_output_style(value)
        when "show-break"
          @show_break = to_boolean(value)
        when "quiet"
          @quiet = to_boolean(value)
        when "text-color"
          @text_highlights = [ make_highlight(name, value) ]
        when %r{^text\-color\-(\d+)$}
          @text_highlights[$1.to_i] = make_highlight(name, value)
        when "verbose"
          Log.verbose = @verbose = to_boolean(value) ? 1 : nil
        when "verbosity"
          Log.verbose = @verbose = value.to_i
        when "split-as-path"
          @split_as_path = to_boolean(value)
        when "size-limit"
          @size_limit = value.to_i
        when "basename", "name", "with-basename", "with-name"
          @with_basename = Regexp.create(value)
        when "fullname", "path", "with-fullname", "with-path"
          @with_fullname = Regexp.create(value)
        when "without-basename", "without-name"
          @without_basename = Regexp.create(value)
        when "without-fullname", "without-path"
          @without_fullname = Regexp.create(value)
        end
      end
    end
  end
  
  # creates a color for the given option, based on its value
  def make_highlight(opt, value)
    if hl = GlarkOptions.instance.highlighter
      if value
        hl.make(value)
      else
        error opt + " requires a color"
        exit 2
      end
    else
      log "no highlighter defined"
    end
  end

  # returns whether the value matches a true value, such as "yes", "true", or "on".
  def to_boolean(value)
    [ "yes", "true", "on" ].include?(value.downcase)
  end

  def read_environment_variable
    options = Env.split("GLARKOPTS")
    while opt = options.shift
      process_option(opt, options)
    end
  end

  def read_options
    nargs = @args.size
    
    # solitary "-v" means "--version", not --invert-match
    if nargs == 1 && @args[0] == "-v"
      show_version
    end
    
    @expr = nil
    
    while !@expr && (arg = @args.shift)
      process_option(arg, @args)
    end

    unless @expr
      if nargs > 0
        error "No expression provided."
      end
      
      $stderr.puts "Usage: glark [options] expression file..."
      $stderr.puts "Try `glark --help' for more information."
      exit 1
    end
  end

  def process_option(opt, args)
    log "processing option #{opt}"

    case opt
    when Regexp.new('^-0(\d{0,3})')
      log "got record separator"
      if $1.size.zero?
        $/ = "\n\n"
      else
        val = $1.oct
        begin
          $/ = $1.oct.chr
        rescue RangeError => e
          # out of range (e.g., 777) means nil:
          $/ = nil
        end
      end
      log "record separator set to #{$/}"

      # after (context)
    when "-A"
      @after = args.shift.to_i
    when %r{^--after-context(?:=(\d+))?}
      ac = $1 || args.shift
      @after = ac.to_i

      # before (context)
    when "-B"
      @before = args.shift.to_i
    when %r{^--before-context(?:=(\d+))?}
      bc = $1 || args.shift
      @before = bc.to_i

      # after (range)
    when "--after"
      @range_start = args.shift

    when %r{^--after(?:=(\d+%?))}
      @range_start = $1
      
      # before (range)
    when "-b", "--before"
      @range_end = args.shift
      
    when %r{^--before(?:=(\d+%?))}
      @range_end = $1
      
      # range
    when "-R", "--range"
      @range_start, @range_end = args.shift, args.shift

      # context
    when "-C"
      nxt = args.shift
      # keep it if it is a number, else use the default
      if nxt.index(%r{^\d+})
        @before = @after = nxt.to_i
      else
        @before = @after = 2
        args.unshift(nxt)
      end
      
    when %r{^--context(?:=(\d+))?}
      @after = @before = if $1 then $1.to_i else 2 end
      
    when %r{^-([1-9]\d*)$}
      @after = @before = $1.to_i
      log "@after = #{@after}; @before = #{@before}"

      # highlighting
    when "-u", %r{^--highlight(?:=(.+))?$}
      self.highlight = $1 || "multi"

    when "-U", "--no-highlight"
      @highlight = nil
      
      # version
    when "-V", "--version"
      show_version

      # verbose
    when %r{^--verbos(?:e|ity)(?:=(\d+))?}
      @verbose = $1 ? $1.to_i : 1
      Log.verbose = @verbose

    when "-v", "--invert-match"
      @invert_match = true
      @exit_status  = 0

    when "-i", "--ignore-case"
      @nocase = true

    when "--filter"
      @filter = true

    when %r{--no-?filter}
      @filter = false

    when "-g", "--grep"
      set_output_style("grep")

    when "-?", "--help"
      GlarkHelp.new
      exit 0

    when "--explain"
      @explain = true

      # line numbers
    when "-N", "--no-line-number"
      @show_line_numbers = false
    when "-n", "--line-number"
      @show_line_numbers = true
    when "--line-number-color"
      @line_number_highlight = make_highlight(opt, args.shift)

    when "-q", "-s", "--quiet", "--messages"
      @quiet = true
    when "-Q", "-S", "--no-quiet", "--no-messages"
      @quiet = false

    when "-m", %r{^--match-limit(?:=(\d+))?}
      ml = $1 || args.shift
      @match_limit = ml.to_i
      
    when "-w", "--word", "--word-regexp"
      @whole_words = true

    when "-x", "--line-regexp"
      @whole_lines = true
      
    when "-l", "--files-with-matches"
      @file_names_only = true
      @invert_match = false
    when  "-L", "--files-without-match"
      @file_names_only = true
      @invert_match = true

      # basename include/exclude matching
    when %r{^--(?:with-)?(?:basename|name)(?:=(.+))?$}
      arg = $1 || args.shift
      @with_basename = Regexp.create(arg.dup)

    when %r{^--without-(?:basename|name)(?:=(.+))?$}
      arg = $1 || args.shift
      @without_basename = Regexp.create(arg.dup)

      # fullname include/exclude matching
    when %r{^--(?:with-)?(?:fullname|path)(?:=(.+))?$}
      arg = $1 || args.shift
      @with_fullname = Regexp.create(arg.dup)

    when %r{^--without-(?:fullname|path)(?:=(.+))?$}
      arg = $1 || args.shift
      @without_fullname = Regexp.create(arg.dup)

      # colors
    when "-T", %r{^--text-color(?:=(.+))?$}
      thl = $1 || args.shift
      @text_highlights = [ make_highlight(opt, thl) ]

    when %r{^--text-color-(\d+)(?:=(.+))?$}
      idx = $1.to_i
      thl = $2 || args.shift
      @text_highlights[idx] = make_highlight(opt, thl)

    when "-F", %r{^--file-color(?:=(.+))?}
      fhl = $1 || args.shift
      @file_highlight = make_highlight(opt, fhl)

    when "-c", "--count"
      @count = true

    when "-Z", "--null"
      @write_null = true

    when "-M", "--exclude-matching"
      @exclude_matching = true
      
    when "-d"
      @directory = args.shift

    when %r{^--directories(?:=(\w+))?}
      @directory = $1

    when "-r", "--recurse"
      @directory = "recurse"

    when "-o", "-a"
      @expr = ExpressionFactory.new.make_expression(opt, args)
      return @expr               # we are done.

    when "-H", %r{^--with-?filenames?$}
      @show_file_names = true
      
    when "-h", %r{^--no-?filenames?$}
      @show_file_names = false
      
    when %r{^--binary-files?=\"?(\w+)\"?}
      @binary_files = $1

    when "-y", "--extract-matches"
      @extract_matches = true

    when %r{^--run(?:=(\w+))?}
      @run = $1 ? $1.to_i : 2
      
    when %r{^--output=(\w+)}
      set_output_style($1)
      
    when %r{ ^(\-(?:[1-9]\d*|\w)) (.+) }x
      # splits options: -13wo => (-13, -wo), -yf => (-y, -f)
      
      opt, rest = $1, "-" + $2
      args.unshift(rest)
      return process_option(opt, args)

    when "-f"
      @expr = ExpressionFactory.new.read_file(args.shift)

    when %r{^--file=(.+)$}
      @expr = ExpressionFactory.new.read_file($1)

    when "--no-split-as-path"
      @split_as_path = false

    when %r{^--split-as-path(?:=(.+))?$}
      value = $1 || "true"
      @split_as_path = to_boolean(value)

    when %r{^--label=(.+)}
      @label = $1

    when %r{^--size-limit=(\w+)}
      @size_limit = $1.to_i

    when "--conf"
      fields = {
        "after-context" => @after,
        "before-context" => @before,
        "binary-files" => @binary_files,
        "file-color" => @file_highlight,
        "filter" => @filter,
        "highlight" => @highlight,
        "ignore-case" => @nocase,
        "known-nontext-files" => FileTester.nontext_extensions.sort.join(' '),
        "known-text-files" => FileTester.text_extensions.sort.join(' '),
        "line-number-color" => @line_number_highlight,
        "local-config-files" => @local_config_files,
        "output" => @output,
        "quiet" => @quiet,
        "show-break" => @show_break,
        "size-limit" => @size_limit,
        "split-as-path" => @split_as_path,
        "text-color" => @text_highlights.join(' '),
        "verbose" => @verbose,
      }

      fields.keys.sort.each do |fname|
        puts  
        puts "#{fname}: #{fields[fname]}"
      end
      exit

    when "--dump"
      fields = {
        "after" => @after,
        "before" => @before,
        "binary_files" => @binary_files,
        "count" => @count,
        "directory" => @directory,
        "exclude_matching" => @exclude_matching,
        "explain" => @explain,
        "expr" => @expr,
        "extract_matches" => @extract_matches,
        "file_highlight" => @file_highlight ? @file_highlight.highlight("filename") : "filename",
        "file_names_only" => @file_names_only,
        "filter" => @filter,
        "highlight" => @highlight,
        "invert_match" => @invert_match,
        "known_nontext_files" => FileTester.nontext_extensions.join(", "),
        "known_text_files" => FileTester.text_extensions.join(", "),
        "label" => @label,
        "line_number_highlight" => @line_number_highlight ? @line_number_highlight.highlight("12345") : "12345",
        "local_config_files" => @local_config_files,
        "match_limit" => @match_limit,
        "nocase" => @nocase,
        "output" => @output,
        "quiet" => @quiet,
        "range_end" => @range_end,
        "range_start" => @range_start,
        "ruby version" => RUBY_VERSION,
        "show_break" => @show_break,
        "show_file_names" => @show_file_names,
        "show_line_numbers" => @show_line_numbers,
        "text_highlights" => @text_highlights.compact.collect { |hl| hl.highlight("text") }.join(", "),
        "verbose" => @verbose,
        "version" => $VERSION,
        "whole_lines" => @whole_lines,
        "whole_words" => @whole_words,
        "with-basename" => @with_basename,
        "without-basename" => @without_basename,
        "with-filename" => @with_filename,
        "without-filename" => @without_filename,
        "write_null" => @write_null,
      }

      len = fields.keys.collect { |f| f.length }.max
      
      fields.keys.sort.each do |field|
        printf "%*s : %s\n", len, field, fields[field]
      end
      exit 0

      # the expression
    else
      log "expression: #{@expr}"
      if @expr
        # we already have an expression
        args.unshift(opt)
        return @expr
      else
        known_end = false
        if opt == "--"
          log "end of options"
          opt = args.shift
          known_end = true
        else
          log "not an option: #{opt}"
        end

        if args
          @expr = ExpressionFactory.new.make_expression(opt, args, !known_end)
          return @expr          # we are done.
        end
      end
    end
    nil                         # we're not done.
  end

  # check options for collisions/data validity
  def validate
    if @range_start && @range_end
      pctre = Regexp.new('([\.\d]+)%')
      smd = pctre.match(@range_start)
      emd = pctre.match(@range_end)

      # both or neither are percentages:
      if !smd == !emd
        if smd
          if smd[1].to_f > emd[1].to_f
            error "range start (#{smd}) follows range end (#{emd})"
            exit 2
          end
        elsif @range_start.to_i > @range_end.to_i
          error "range start (#{@range_start}) follows range end (#{@range_end})"
          exit 2
        end
      end
    end
  end

  def show_version
    puts $PACKAGE + ", version " + $VERSION
    puts "Written by Jeff Pace (jpace@incava.org)."
    puts "Released under the Lesser GNU Public License."
    exit 0
  end
  
end

def main
  begin
    Log.set_widths(8, 5, -25, -25)

    Log.log "loading options"
    opts = GlarkOptions.instance
    opts.run(ARGV)
    Log.log "done loading options"

    # To get rid of the annoying stack trace on ctrl-C:
    trap("INT") { abort }
    
    if opts.explain
      puts opts.expr.explain
    end

    glark = Glark.new(opts.expr)
    $files = if ARGV.size > 0 then
               if opts.split_as_path
                 ARGV.collect { |f| f.split(File::PATH_SEPARATOR) }.flatten
               else
                 ARGV
               end
             else 
               [ '-' ]
             end
    
    $files.each do |f|
      glark.search(f) 
    end

    exit opts.exit_status
  rescue => e
    # show the message, and the stack trace only if verbose:
    $stderr.puts "error: #{e}"
    if opts.verbose
      raise
    else
      exit 2
    end
  end
end

if __FILE__ == $0
  main
end

__END__
# prototype of forthcoming feature:

# multi-pass execution:
./glark --run=2 '/(\w+)\s*=\s*\d+/' *.c

# means extract twice:

first run:
    matches = Array.new
    GlarkOptions.matches = matches
    glark = Glark.new(ARGV, :write => false)
    glark.search($files)

second run:
    GlarkOptions.matches = nil
    expr = MultiOrExpression.new(matches)
    glark = Glark.new(ARGV, :expr => expr)
    glark.search($files)
