#!/usr/bin/env perl

use strict;
use warnings;

use Cwd qw(abs_path);
use Getopt::Long;
use JSON::MaybeXS;
use File::Find;
use File::Spec;
use Time::HiRes qw(time);
use Pod::Usage;

use App::Test::Generator::Mutator;

=pod

=head1 NAME

app-test-generator-mutate - Run mutation testing against a Perl test suite

=head1 SYNOPSIS

    app-test-generator-mutate [options]

    app-test-generator-mutate --lib lib --tests t
    app-test-generator-mutate --file lib/My/Module.pm
    app-test-generator-mutate --json mutation.json
    app-test-generator-mutate --min-score 75

=head1 QUICK START

    app-test-generator-mutate --lib lib --min-score 85 --json mutation.json --html mutation_html
    open mutation_html/index.html

=head2 Numeric Boundary Mutants

Kill Numeric Boundary Mutants first,
these are the easiest wins.
For example,
C<NUM_BOUNDARY_1295>
means something like C<if ($x > 10)> became C<if ($x >= 10)> or C<if ($x > 9)>.
If that survived, it means, there is a missing edge value.
Numeric mutations are important because they reveal missing edge coverage.
This example means line 1295.

So if that line contains something like this:

  if(((scalar keys %input) == 1) && exists($input{'type'}) && !ref($input{'type'})) {

You need to add a test where

=over 4

=item * %input contains more than one key

=item * One of them is type

=item * And behavior must be different

=back

For example, if you have a test with

  %input = ( type => 'string' )

add a test which sets

  %input = (
    type => 'string',
    something_else => 'value'
  )

=head2 Conditional Inversions

Then kill Conditional Inversions, for example, C<COND_INV_1186>,
where C<unless (-f $file)> became C<if (-f $file)>.
If that survives,
test did not assert the negative case.

Focus by file,
if one file contributes 200 survivors, that's the weakest module.

Frequently re-run.
The loop should be: add 5-10 targeted tests, re-run mutation tool, watch score climb, repeat.

=head1 DESCRIPTION

This command-line tool performs mutation testing on a Perl codebase.

It scans one or more C<.pm> files, generates code mutations using
L<App::Test::Generator::Mutator>, and runs the project's test suite
against each mutated version inside an isolated workspace.

For each generated mutant:

=over 4

=item *

The mutant is applied in a temporary workspace.

=item *

The mutated file is syntax-checked.

=item *

The test suite is executed using C<prove>.

=item *

If the tests fail, the mutant is considered I<killed>.

=item *

If the tests pass, the mutant is considered I<survived>.

=back

A mutation score is then calculated:

    (killed / total) * 100

Mutation testing measures the effectiveness of a test suite. A higher
mutation score indicates that the tests are better at detecting behavioral
changes in the code.

=head1 OPTIONS

=head2 --lib <dir>

Directory containing Perl modules to mutate.

Defaults to C<lib>.

=head2 --file <file>

Mutate a single file instead of scanning the entire C<--lib> directory.

=head2 --tests <dir>

Directory containing test files.

Defaults to C<t>.

=head2 --min-score <int>

Minimum acceptable mutation score (percentage).

If the final score is below this value, the program exits with a
non-zero status.

=head2 --json <file>

Write mutation results to the specified JSON file.

The output structure:

    {
        score    => "85.32",
        total    => 120,
        killed   => 102,
        survived => [ ... mutant IDs ... ]
    }

=head2 --cover_json <file>

The location of the file generatated by C<cover -report json>.
That file is used to generate an approximation for an LCSAJ table.

=head2 --fail-fast

(Reserved for future use.)

=head2 --mutation_level <full|fast>

Setting to C<fast> removes redundant mutations and dedups mutations before running.
The default is C<full>.

=head2 --timeout <seconds>

(Reserved for future use.)

=head2 --verbose

Print progress information.

=head2 --quiet

Suppress final summary output.

=head1 EXIT CODES

=over 4

=item = 0

Success and mutation score meets minimum threshold.

=item = 1

Mutation score below C<--min-score>.

=item = 2

Baseline test suite failed before mutation testing began.

=item = 3

Invalid command-line options.

=back

=head1 WORKFLOW

The tool performs the following steps:

=over 4

=item 1.

Collect target files (either a single file or all C<.pm> files under C<--lib>).

=item 2.

Run baseline tests to ensure the suite passes before mutation.

=item 3.

Generate mutants for each file.

=item 4.

Apply each mutant in isolation and re-run the test suite.

=item 5.

Calculate and report mutation statistics.

=back

=encoding utf-8

=head1 WORKFLOW DIAGRAM

The mutation testing process follows this execution flow:

    ┌───────────────────────────────┐
    │ Start                         │
    └───────────────┬───────────────┘
                    │
                    ▼
    ┌───────────────────────────────┐
    │ Collect Target Files          │
    │  --file OR scan --lib/*.pm    │
    └───────────────┬───────────────┘
                    │
                    ▼
    ┌───────────────────────────────┐
    │ Run Baseline Tests            │
    │ prove -l t                    │
    └───────────────┬───────────────┘
                    │
         Baseline OK? ── No ──► Exit (code 2)
                    │
                   Yes
                    │
                    ▼
    ┌───────────────────────────────┐
    │ For Each File                │
    └───────────────┬───────────────┘
                    │
                    ▼
    ┌───────────────────────────────┐
    │ Generate Mutants             │
    │ (conditional flips, etc.)    │
    └───────────────┬───────────────┘
                    │
                    ▼
        ┌───────────────────────────────┐
        │ For Each Mutant              │
        └───────────────┬───────────────┘
                        │
                        ▼
        ┌───────────────────────────────┐
        │ Prepare Workspace            │
        │ (isolated temp directory)    │
        └───────────────┬───────────────┘
                        │
                        ▼
        ┌───────────────────────────────┐
        │ Apply Mutant                 │
        └───────────────┬───────────────┘
                        │
                        ▼
        ┌───────────────────────────────┐
        │ Syntax Check                 │
        │ perl -c mutated_file.pm      │
        └───────────────┬───────────────┘
                        │
              Compiles? ── No ──► Skip Mutant
                        │
                       Yes
                        │
                        ▼
        ┌───────────────────────────────┐
        │ Run Test Suite               │
        │ prove t                      │
        └───────────────┬───────────────┘
                        │
          Tests Fail? ── Yes ──► Killed++
                        │
                       No
                        │
                        ▼
                   Survived++
                        │
                        ▼
        ┌───────────────────────────────┐
        │ Repeat for Next Mutant       │
        └───────────────┬───────────────┘
                        │
                        ▼
    ┌───────────────────────────────┐
    │ Calculate Mutation Score      │
    │ (killed / total) * 100        │
    └───────────────┬───────────────┘
                    │
                    ▼
    ┌───────────────────────────────┐
    │ Print Report / Write JSON     │
    └───────────────┬───────────────┘
                    │
                    ▼
                 Finish

=cut

my %opt = (
	lib => 'lib',
	tests => 't',
	min_score => 0,
	fail_fast => 0,
	verbose => 0,
	quiet => 0,
	man	=> 0,
	help	=> 0,
	html => 0,
	mutation_level => 'full',
	lcsaj_root  => 'lcsaj',
	lcsaj_hits => 'cover_html/lcsaj_hits.json',
);

GetOptions(
	'lib=s' => \$opt{lib},
	'file=s' => \$opt{file},
	'tests=s' => \$opt{tests},
	'min-score=i' => \$opt{min_score},
	'cover_json=s' => \$opt{cover_json},
	'json=s' => \$opt{json},
	'fail-fast' => \$opt{fail_fast},
	'timeout=i' => \$opt{timeout},
	'verbose' => \$opt{verbose},
	'quiet' => \$opt{quiet},
	'help|h' => \$opt{help},
	'html=s'	=> \$opt{html},
	'mutation_level=s'	=> \$opt{mutation_level},
	'man|m' => \$opt{man},
	'lcsaj_root=s'  => \$opt{lcsaj_root},
	'lcsaj_hits=s' => \$opt{lcsaj_hits},
) or pod2usage(3);

pod2usage(-exitval => 0, -verbose => 1) if $opt{help};
pod2usage(-exitval => 0, -verbose => 2) if $opt{man};

# -------------------------
# Collect Files
# -------------------------

my @files;

if ($opt{file}) {
	push @files, $opt{file};
} else {
	find(
		sub {
			push @files, $File::Find::name if /\.pm$/;
		},
		$opt{lib}
	);
}

# Pass target files to LCSAJ runtime debugger
$ENV{LCSAJ_TARGETS} = join(':', map { abs_path($_) } @files);

# -------------------------
# Verify baseline tests
# -------------------------

print "Running baseline tests...\n" if $opt{verbose};

if (system("prove -Mblib -l $opt{tests}") != 0) {
	print STDERR "Baseline tests failed.\n";
	exit 2;
}

# -------------------------
# Run Mutation Testing
# -------------------------

my $total = 0;
my $killed = 0;
my @survivors;
my @killed_mutants;

for my $file (@files) {
	my $mutator = App::Test::Generator::Mutator->new(
		file => $file,
		lib_dir => $opt{lib},
		mutation_level => $opt{mutation_level},
	);

	# ------------------------------------------------------------
	# Group mutants by mutation site
	# ------------------------------------------------------------

	my @mutants = $mutator->generate_mutants();

	my $groups = {};

	if ($opt{mutation_level} eq 'fast') {
		$groups = group_mutants($file, \@mutants);
	} else {
		$groups = { map { $_->id => [$_] } @mutants };
	}

	for my $group (values %$groups) {
		my $mutant = representative_mutant($group);

		print 'Testing representative mutant ',
			$mutant->id,
			" ($file line ",
			$mutant->line,
			') representing ',
			scalar(@$group),
			" mutants\n" if $opt{verbose};

		# Now run mutation test using only this representative
		my $workspace = $mutator->prepare_workspace();

		$mutator->apply_mutant($mutant);

		# The workspace contains a full copy of lib/ with just the mutant applied.
		# We point PERL5LIB at it so prove picks up the mutated module instead of
		# the original, without -Mblib overriding us by loading the project's own
		# blib/ first.
		local $ENV{PERL5LIB} = File::Spec->catfile($workspace, $opt{lib});

		my $compile = system($^X, '-c', File::Spec->catfile($workspace, $file));
		next if $compile != 0;

		# warn "WORKSPACE: $workspace\n";
		# warn "WORKSPACE lib contents: " . join(', ', glob("$workspace/lib/*")) . "\n";
		# warn "PERL5LIB: $ENV{PERL5LIB}\n";

		# Clear PERL5OPT to prevent any -Ilib or other flags from overriding
		# the workspace PERL5LIB. If PERL5OPT contains -Ilib, prove will load
		# modules from the project's own lib/ directory instead of the mutated
		# workspace, causing all mutants to survive regardless of the mutation.
		local $ENV{PERL5OPT} = '';

		my $survived = (system('prove', $opt{tests}) == 0);

		# --------------------------------------------------
		# If tests pass → mutant survived
		# --------------------------------------------------
		if ($survived) {
			# representative survived
			# mark entire group as survived

			for my $m (@$group) {
				my $difficulty = mutation_difficulty($m);

				push @survivors, {
					id          => $m->id(),
					line        => $m->line(),
					file        => $file,
					description => $m->description(),
					status      => 'Survived',
					difficulty  => $difficulty->{label},
					priority    => $difficulty->{score},
					hint        => $difficulty->{hint},
				};

				$total++;
			}
		} else {
			# representative killed
			# assume whole group killed

			for my $m (@$group) {
				push @killed_mutants, {
					id => $m->id(),
					line => $m->line(),
					file => $file,
					description => $m->description(),
					status => 'Killed'
				};

				$total++;
				$killed++;
			}
		}

		# workspace auto-destroyed when scope ends
	}
}

# -------------------------
# Report
# -------------------------

my $score = $total ? sprintf('%.2f', ($killed / $total) * 100) : 100;

unless ($opt{quiet}) {
	print "\nMutation Score: $score%\n";
	print "Total: $total\n";
	print "Killed: $killed\n";
	print 'Survived: ', scalar(@survivors), "\n";
}

if ($opt{json}) {
	# Sort Survivors by Priority
	@survivors = sort { $b->{priority} <=> $a->{priority} } @survivors;

	open my $fh, '>', $opt{json} or die $!;
	print $fh encode_json({
		score => $score,
		total => $total,
		killed_count => $killed,
		survived_count => scalar(@survivors),
		killed => \@killed_mutants,
		survived => \@survivors,
	});

	close $fh;
}

if ($opt{html}) {
	if(!$opt{json}) {
		die '--html needs --json';
	}
	require App::Test::Generator::Report::HTML;
	App::Test::Generator::Report::HTML->import();

	App::Test::Generator::Report::HTML->generate(
		$opt{json},             # mutation results
		$opt{html},             # output directory
		$opt{cover_json},       # optional Devel::Cover JSON
		$opt{lcsaj_root},                # directory containing *.lcsaj.json files
		$opt{lcsaj_hits},  # runtime LCSAJ hit data
	);
}

if($score < $opt{min_score}) {
	print STDERR "score $score is less than min_score $opt{min_score}\n";
	exit 1;
}
exit 0;

# ------------------------------------------------------------
# group_mutants
#
# Groups mutants that affect the same location and operator.
#
# Example:
#
#   line 42:  >  → >=
#   line 42:  >  → <
#
# These belong to the same mutation "site".
#
# The function returns a structure like:
#
#   {
#      "file:line:operator" => [ mutant1, mutant2, mutant3 ]
#   }
#
# This allows us to treat them as a logical unit.
# ------------------------------------------------------------

sub group_mutants {
	my ($file, $mutants) = @_;

	my %groups;

	for my $m (@$mutants) {
		my $line = $m->line;

		# original token mutated (>, ==, !, etc)
		my $orig = defined $m->original ? $m->original : '';

		# group key identifies a mutation site
		$orig =~ s/\s+//g;
		my $key = join(':', $file, $line, $orig);

		push @{ $groups{$key} }, $m;
	}

	return \%groups;
}

# ------------------------------------------------------------
# representative_mutant
#
# Selects a single mutant from a group to represent
# the mutation site.
#
# Strategy:
#   - prefer boundary mutations
#   - otherwise use the first mutant
#
# This improves mutation efficiency by avoiding
# redundant test runs.
# ------------------------------------------------------------

sub representative_mutant {
	my $group = $_[0];

	for my $m (@$group) {
		# Prefer boundary-type mutations if available
		if($m->id() && $m->id() =~ /BOUNDARY|NUM/) {
			return $m;
		}
	}

	# fallback: first mutant
	return $group->[0];
}

# ------------------------------------------------------------
# mutation_difficulty
#
# Assigns a difficulty ranking to a mutant.
#
# Difficulty reflects how valuable killing the mutant would be.
# Higher difficulty means the missing test likely reveals
# a meaningful behavioral gap.
# ------------------------------------------------------------

sub mutation_difficulty {
	my $mutant = $_[0];

	my $id = $mutant->id // '';

	# Numeric boundary mutants expose missing edge cases
	return {
		score => 3,
		label => 'HIGH',
		hint => 'Likely missing edge-case test (boundary value)'
	} if $id =~ /BOUNDARY|NUM/;

	# Conditional inversions reveal missing negative assertions
	return {
		score => 2,
		label => 'MEDIUM',
		hint => 'Add tests asserting both true and false outcomes'
	} if $id =~ /COND|INV|NEG/;

    # Logical operator changes
    return {
        score => 2,
        label => 'MEDIUM',
	hint => 'Test combinations where only one logical operand is true'
    } if $id =~ /LOGIC|AND|OR/;

    # Default: minor mutation
    return {
        score => 1,
        label => 'LOW',
        hint  => 'Mutation survived but impact may be minor'
    };
}

=head1 AUTHOR

Nigel Horne

=cut
