Article 6614 of comp.lang.perl:
Xref: feenix.metronet.com comp.lang.perl:6614
Path: feenix.metronet.com!news.ecn.bgu.edu!usenet.ins.cwru.edu!howland.reston.ans.net!agate!ames!purdue!not-for-mail
From: spaf@cs.purdue.edu (Gene Spafford)
Newsgroups: comp.lang.perl
Subject: Re: Perl .newsrc cleanup script?
Date: 10 Oct 1993 10:47:30 -0500
Organization: Department of Computer Sciences, Purdue University
Lines: 175
Message-ID: <299aqiINNe4o@uther.cs.purdue.edu>
References: <2918j2$7r0@snoopy.cis.ufl.edu>
NNTP-Posting-Host: uther.cs.purdue.edu
In-reply-to: spp@dragonfly.cis.ufl.edu's message of 7 Oct 1993 14:20:18 GMT

The following doesn't do exactly what Steve requested, and it is
*realy* ugly and it has localisms for our news and my preference for
newsgroup order, but....it works.

This takes as argument the .newsrc file and the news/lib/active file.
It updates all the range lists in newsgroup lines, and sorts them
according to a prespecified order (see the pattern list).  Then it
writes out a new copy with ! lines at the end.

Happy hacking.

#!/usr/local/perl/perl -s

@Patterns = ( 'announce', 'general', '^[^.]+$', '^serc\.', 
	'^rec\.humor\.funny', 'comp\.risks', '^purdue\.',
	'^in\.', '^ieee\.', '^gnu\.', '^news\.', '^comp\.', '^sci\.', 
	'^misc\.', '^rec\.', '^soc\.', '^alt\.', '^talk\.');
	

die "usage: $0 newsrc-file [active-file]\n" unless ($Fname = shift);
open(NRC, "<$Fname") || die "Cannot open $Fname: $!";
(rename ($Fname, "$Fname.old") || die "Cannot rename $Fname: $!")
	unless $debug;
$, = $\ = "\n";


@slurp = (<NRC>);
close NRC;
chop @slurp;
warn (scalar(@slurp), " newsgroup lines found in $Fname\n") if $debug;

if ($active = shift) {
    open(ACTIVE, "<$active") || die "Cannot open active file $active: $!";

    while (<ACTIVE>) {
	split;
	$Low{@_[0]} = @_[2]+0;
	$High{@_[0]} = @_[1]+0;
    }
    close ACTIVE;
    
    $index = 0; until ($index > $#slurp) {
	($group) = $slurp[$index] =~ m/^(\S+)[:!]/;
	next if defined $Low{$group};
        warn "Deleting bogus group: $group\n";
        splice (@slurp, $index, 1) && redo;
    } continue {++$index;}

    for ($_ = 0; $_ <= $#slurp; ++$_) {
        ($group, $sep, $numbs) = 
		$slurp[$_] =~ m/^(\S+)([:!])\s*(\d+.*)$/;
	next unless $numbs =~ m/\S/;
	warn "Group $group has read $numbs\n" if $debug;
	$numbs = &CheckRead($group, $numbs);
	warn "New range is $numbs\n\n" if $debug;
	$slurp[$_] = "$group$sep $numbs";
    }
}

foreach $_ (sort {$a cmp $b;} @slurp) {
    push (@On, $_) if m/:/;
    push (@Off, $_) unless m/:/;
    warn "Don't know what to do about: $_\n" unless m/[:!]/;
}

open(NEW, ">$Fname") || die "Cannot open $Fname: $!" unless $debug;
open(NEW, "|cat") if $debug;

foreach $pattern (@Patterns) {
eval <<"EOF" ;
    \$index = $[;
    until (\$index > \$#On) {
	next unless \$On[\$index] =~ m/$pattern/o;
	print NEW splice(\@On, \$index, 1, ());
	redo;
       } continue {++\$index;}
EOF
warn $@ if $@;
}

print NEW @On, @Off;
close NEW;
exit;


sub CheckRead {
    local($group, $numbers) = @_;
    local($low, $high, $result) = ($Low{$group}, $High{$group}, '');
    local(@bool, $start, $flag, $next, $first, $index);

    warn "active file has $low to $high for $group\n"
	if $debug;

# Simplest case -- all articles expired (empty group)
    return "1-$high" if $high < $low;

# Next simplest case -- I'm already caught up
    $numbers =~ m/^\s+((\d+)-(\d+))$/ && $2 == 1 && $3 == $high && return $1;

# We construct a boolean array indicating what has been seen
    foreach $index (split(/,/, $numbers)) {
        if ($index =~ m/(\d+)-(\d+)/) {
	    next if $2 < $low;
	    $first = $1 < $low ? $low : $1;
	    foreach $_ ($first .. $2) {$bool[$_] = 1;}
        } else {
	    $bool[$index]++ unless $index < $low;
	}
    }
    warn "max article read is $#bool\n" if $debug;

# If the most recently-read article has expired, the case is simple
    if ($#bool < $low) {
	$low--;
	return "1-$low";
    }
# If the group has been here awhile, we can reduce the array
    elsif ($low > 1) {
        $start = $low-1;
	@bool = @bool[$start .. $high];
        $flag = 1;
        $bool[0] = 0;
    }

# Now step through for each set of values
    while (($next = &NextVal(1)) != -1) {
	$start += $next;
	$next = &NextVal(0);
	if ($next < 0) {
	    $result .= ",$start-$high" unless $flag;
	    last;
	}
        elsif ($next == 0) {
	    if ($flag) {
	        $result = $result . ",1-" .
		    (($start == $low) ? "$start" : $low-1 . ",$start");
                $flag = 0;
	    } else {
   	        $result .= ",$start";
	    }
	    $start += 2;
	}
	else {
	    $next += $start;
	    $start = 1 if $flag;
	    $flag = 0;
	    $result .= ",${start}-$next";
  	    $start = $next + 2;
	}
    }

    return "1-$high" unless $result =~ m/\S/;
    substr($result, 1);  # chop the leading ,
}

sub NextVal {
    local($val) = @_;
    local($count, $tmp) = 0;
    local($arrsize) = $#bool;

    while ($count <= $arrsize) {
	$tmp = shift @bool;
	return $count if ($tmp && $val);
	return $count unless ($tmp || $val);
	$count++;
    }

    -1;
}

-- 
Gene Spafford, COAST Project Director
Software Engineering Research Center & Dept. of Computer Sciences
Purdue University, W. Lafayette IN 47907-1398
Internet:  spaf@cs.purdue.edu	phone:  (317) 494-7825


