From 244297cedc527fcab7ce98d1770ac43592b6c46e Mon Sep 17 00:00:00 2001 From: Joseph Carter Date: Tue, 4 Jan 2000 03:26:19 +0000 Subject: [PATCH] Added cvs2cl.pl to tools, added a changelog target to the makefile, and removed the ChangeLog. --- .gitignore | 1 + AUTHORS | 1 + Makefile.in | 11 +- qw_client/r_main.c | 2 +- tools/cvs2cl/COPYING | 340 +++++++++ tools/cvs2cl/README | 3 + tools/cvs2cl/TODO | 17 + tools/cvs2cl/cvs2cl.pl | 1480 ++++++++++++++++++++++++++++++++++++++++ tools/cvs2cl/mywrap.pl | 84 +++ 9 files changed, 1936 insertions(+), 3 deletions(-) create mode 100644 tools/cvs2cl/COPYING create mode 100644 tools/cvs2cl/README create mode 100644 tools/cvs2cl/TODO create mode 100755 tools/cvs2cl/cvs2cl.pl create mode 100755 tools/cvs2cl/mywrap.pl diff --git a/.gitignore b/.gitignore index c98a572..18e7c77 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ config.cache config.log config.status build-stamp +ChangeLog diff --git a/AUTHORS b/AUTHORS index 80cd33c..cdc7774 100644 --- a/AUTHORS +++ b/AUTHORS @@ -19,6 +19,7 @@ QW/Q1 tree merging: Eric Windisch Joseph Carter Peter Andreasen + Maas van den Berg Autoconf support and portability isues: Loring Holden diff --git a/Makefile.in b/Makefile.in index fb71ee5..74b026b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -3,6 +3,8 @@ SRC_DIR = @srcdir@ DESTDIR = +ANONCVS=:pserver:anonymous@cvs.quake.sourceforge.net:/cvsroot/quake + all: @for dir in $(SUBDIRS); do \ $(MAKE) -C $$dir $@ || exit; \ @@ -24,8 +26,12 @@ clean-autoconf mrproper: distclean %: @for dir in $(SUBDIRS); do \ $(MAKE) -C $$dir $@ || exit; \ - done - + + +changelog: + @echo "CVS will ask for a passwd, press enter" + cvs -d$(ANONCVS) login + ./tools/cvs2cl/cvs2cl.pl -g -d$(ANONCVS) # Code to automatically re-configure, only runs if you are compiling in the # source directory @@ -46,3 +52,4 @@ qw_server/Makefile: qw_server/Makefile.in configure uquake/Makefile: uquake/Makefile.in configure ./configure endif + diff --git a/qw_client/r_main.c b/qw_client/r_main.c index 3f59588..9abe4d2 100644 --- a/qw_client/r_main.c +++ b/qw_client/r_main.c @@ -1095,7 +1095,7 @@ void R_InitTurb (void) { int i; - for (i=0 ; i<1280 ; i++) + for (i=0 ; i + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/tools/cvs2cl/README b/tools/cvs2cl/README new file mode 100644 index 0000000..d91c6c0 --- /dev/null +++ b/tools/cvs2cl/README @@ -0,0 +1,3 @@ +Run "./cvs2cl.pl --help" to see what this is all about. + +Oh, and ignore a-subdir and b-subdir. They're just test data. diff --git a/tools/cvs2cl/TODO b/tools/cvs2cl/TODO new file mode 100644 index 0000000..dfc42c8 --- /dev/null +++ b/tools/cvs2cl/TODO @@ -0,0 +1,17 @@ +Oh Most High and Puissant Emacs, pleased to be in -*- outline -*- mode! + +This is my list of pending changes. I really, really will get to +these soon. + +Each entry is marked with an asterisk at the beginning of the line. +Type C-c C-t to collapse all entries, C-c C-a to show them. + +* Cumulative behavior? +Would be great if cvs2cl could read an existing ChangeLog and then +only get the amount of information required to bring it up to date. +I.e., it would find the front date in the ChangeLog and use that in a +"cvs log -d" invocation. Heh. Easy to do. So do it. + +* Modify CVS to expand usernames based on CVSROOT/users +So that the -U option isn't needed. + diff --git a/tools/cvs2cl/cvs2cl.pl b/tools/cvs2cl/cvs2cl.pl new file mode 100755 index 0000000..878c650 --- /dev/null +++ b/tools/cvs2cl/cvs2cl.pl @@ -0,0 +1,1480 @@ +#!/bin/sh +exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*- +#!perl -w + +############################################################## +### ### +### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ### +### ### +############################################################## + +## $Revision$ +## $Date$ +## $Author$ +## +## (C) 1999 Karl Fogel , under the GNU GPL. +## +## (Extensively hacked on by Melissa O'Neill .) +## +## cvs2cl.pl is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## cvs2cl.pl is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You may have received a copy of the GNU General Public License +## along with cvs2cl.pl; see the file COPYING. If not, write to the +## Free Software Foundation, Inc., 59 Temple Place - Suite 330, +## Boston, MA 02111-1307, USA. + + + +use strict; +use Text::Wrap; +use Time::Local; +use File::Basename; + + +# The Plan: +# +# Read in the logs for multiple files, spit out a nice ChangeLog that +# mirrors the information entered during `cvs commit'. +# +# The problem presents some challenges. In an ideal world, we could +# detect files with the same author, log message, and checkin time -- +# each would be a changelog entry. +# We'd sort them; and spit them out. Unfortunately, CVS is *not atomic* +# so checkins can span a range of times. Also, the directory structure +# could be hierarchical. +# +# Another question is whether we really want to have the ChangeLog +# exactly reflect commits. An author could issue two related commits, +# with different log entries, reflecting a single logical change to the +# source. GNU style ChangeLogs group these under a single author/date. +# We try to do the same. +# +# So, we parse the output of `cvs log', storing log messages in a +# multilevel hash that stores the mapping: +# directory => author => time => message => filelist +# As we go, we notice "nearby" commit times and store them together +# (i.e., under the same timestamp), so they appear in the same log +# entry. +# +# When we've read all the logs, we twist this mapping into +# a time => author => message => filelist mapping for each directory. +# +# If we're not using the `--distributed' flag, the directory is always +# considered to be `./', even as descend into subdirectories. + + +############### Globals ################ + + +# What we run to generate it: +my $Log_Source_Command = "cvs log"; + +# In case we have to print it out: +my $VERSION = '$Revision$'; +$VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/; + +## Vars set by options: + +# Print debugging messages? +my $Debug = 0; + +# Just show version and exit? +my $Print_Version = 0; + +# Just print usage message and exit? +my $Print_Usage = 0; + +# Single top-level ChangeLog, or one per subdirectory? +my $Distributed = 0; + +# What file should we generate (defaults to "ChangeLog")? +my $Log_File_Name = "ChangeLog"; + +# Expand usernames to email addresses based on a map file? +my $User_Map_File = ""; + +# Output to a file or to stdout? +my $Output_To_Stdout = 0; + +# Eliminate empty log messages? +my $Prune_Empty_Msgs = 0; + +# Separates header from log message +my $After_Header = " "; + +# Format more for programs than for humans. +my $XML_Output = 0; + +# Show times in UTC instead of local time +my $UTC_Times = 0; + +# Show day of week in output? +my $Show_Day_Of_Week = 0; + +# Show revision numbers in output? +my $Show_Revisions = 0; + +# Show tags (symbolic names) in output? +my $Show_Tags = 0; + +# Show branches by symbolic name in output? +my $Show_Branches = 0; + +# Show only revisions on these branches or their ancestors. +my @Follow_Branches; + +# Don't bother with files matching this regexp. +my @Ignore_Files; + +# How exactly we match entries. We definitely want "o", +# and user might add "i" by using --case-insensitive option. +my $Case_Insensitive = 0; + +# Maybe only show log messages matching a certain regular expression. +my $Regexp_Gate = ""; + +# Pass this global option string along to cvs, to the left of `log': +my $Global_Opts = ""; + +# Pass this option string along to the cvs log subcommand: +my $Command_Opts = ""; + +# Read log output from stdin instead of invoking cvs log? +my $Input_From_Stdin = 0; + +# Max checkin duration. CVS checkin is not atomic, so we may have checkin +# times that span a range of time. We assume that checkins will last no +# longer than $Max_Checkin_Duration seconds, and that similarly, no +# checkins will happen from the same users with the same message less +# than $Max_Checkin_Duration seconds apart. +my $Max_Checkin_Duration = 180; + +# What to put at the front of [each] ChangeLog. +my $ChangeLog_Header = ""; + +## end vars set by options. + +# In 'cvs log' output, one long unbroken line of equal signs separates +# files: +my $file_separator = "=======================================" + . "======================================"; + +# In 'cvs log' output, a shorter line of dashes separates log messages +# within a file: +my $logmsg_separator = "----------------------------"; + + +############### End globals ############ + + + + +&parse_options (); +&derive_change_log (); + + + +### Everything below is subroutine definitions. ### + +# Fills up a ChangeLog structure in the current directory. +sub derive_change_log () +{ + # See "The Plan" above for a full explanation. + + my %grand_poobah; + + my $file_full_path; + my $time; + my $revision; + my $author; + my $msg_txt; + my $detected_file_separator; + + # We might be expanding usernames + my %usermap; + + # In general, it's probably not very maintainable to use state + # variables like this to tell the loop what it's doing at any given + # moment, but this is only the first one, and if we never have more + # than a few of these, it's okay. + my $collecting_symbolic_names = 0; + my %symbolic_names; # Where tag names get stored. + my %branch_names; # We'll grab branch names while we're at it. + my %branch_numbers; # Save some revisions for @Follow_Branches + my @branch_roots; # For showing which files are branch ancestors. + + + if (! $Input_From_Stdin) { + open (LOG_SOURCE, "$Log_Source_Command |") + or die "unable to run \"${Log_Source_Command}\""; + } + else { + open (LOG_SOURCE, "-") or die "unable to open stdin for reading"; + } + + %usermap = &maybe_read_user_map_file (); + + while () + { + # If on a new file and don't see filename, skip until we find it, and + # when we find it, grab it. + if ((! (defined $file_full_path)) and /^Working file: (.*)/) { + $file_full_path = $1; + if (@Ignore_Files) { + my $base; + ($base, undef, undef) = fileparse ($file_full_path); + # Ouch, I wish trailing operators in regexps could be + # evaluated on the fly! + if ($Case_Insensitive) { + if (grep ($file_full_path =~ m|$_|i, @Ignore_Files)) { + undef $file_full_path; + } + } + elsif (grep ($file_full_path =~ m|$_|, @Ignore_Files)) { + undef $file_full_path; + } + } + next; + } + + # Just spin wheels if no file defined yet. + next if (! $file_full_path); + + # Collect tag names in case we're asked to print them in the output. + if (/^symbolic names:$/) { + $collecting_symbolic_names = 1; + next; # There's no more info on this line, so skip to next + } + if ($collecting_symbolic_names) + { + # All tag names are listed with whitespace in front in cvs log + # output; so if see non-whitespace, then we're done collecting. + if (/^\S/) { + $collecting_symbolic_names = 0; + } + else # we're looking at a tag name, so parse & store it + { + # According to the Cederqvist manual, in node "Tags", tag + # names must start with an uppercase or lowercase letter and + # can contain uppercase and lowercase letters, digits, `-', + # and `_'. However, it's not our place to enforce that, so + # we'll allow anything CVS hands us to be a tag: + /^\s([^:]+): ([\d.]+)$/; + my $tag_name = $1; + my $tag_rev = $2; + + # You can always tell a branch by the ".0." as the + # second-to-last digit in the revision number. + if ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) { + my $real_branch_rev = $1 . $3; + $branch_names{$real_branch_rev} = $tag_name; + if (@Follow_Branches) { + if (grep ($_ eq $tag_name, @Follow_Branches)) { + $branch_numbers{$tag_name} = $real_branch_rev; + } + } + } + else { + # Else it's just a regular (non-branch) tag. + push (@{$symbolic_names{$tag_rev}}, $tag_name); + } + } + } + # End of code for collecting tag names. + + # If have file name, but not revision, and see revision, then grab + # it. (We collect unconditionally, even though we may or may not + # ever use it.) + if ((! (defined $revision)) and (/^revision (\d+\.[\d.]+)/)) + { + $revision = $1; + + if (@Follow_Branches) + { + foreach my $branch (@Follow_Branches) + { + # Special case for following trunk revisions + if (($branch =~ /^trunk$/i) and ($revision =~ /^[0-9]+\.[0-9]+$/)) + { + goto dengo; + } + + my $branch_number = $branch_numbers{$branch}; + if ($branch_number) + { + # Are we on one of the follow branches or an ancestor of + # same? + # + # If this revision is a prefix of the branch number, or + # possibly is less in the minormost number, OR if this + # branch number is a prefix of the revision, then yes. + # Otherwise, no. + # + # So below, we determine if any of those conditions are + # met. + + # Trivial case: is this revision on the branch? + # (Compare this way to avoid regexps that screw up Emacs + # indentation, argh.) + if ((substr ($revision, 0, ((length ($branch_number)) + 1))) + eq ($branch_number . ".")) + { + goto dengo; + } + # Non-trivial case: check if rev is ancestral to branch + elsif ((length ($branch_number)) > (length ($revision))) + { + $revision =~ /^([\d\.]+)(\d+)$/; + my $r_left = $1; # still has the trailing "." + my $r_end = $2; + + $branch_number =~ /^([\d\.]+)(\d+)\.\d+$/; + my $b_left = $1; # still has trailing "." + my $b_mid = $2; # has no trailing "." + + if (($r_left eq $b_left) + && ($r_end <= $b_mid)) + { + goto dengo; + } + } + } + } + } + else # (! @Follow_Branches) + { + next; + } + + # Else we are following branches, but this revision isn't on the + # path. So skip it. + undef $revision; + dengo: + next; + } + + # If we don't have a revision right now, we couldn't possibly + # be looking at anything useful. + if (! (defined ($revision))) { + $detected_file_separator = /^$file_separator$/o; + if ($detected_file_separator) { + # No revisions for this file; can happen, e.g. "cvs log -d DATE" + goto CLEAR; + } + else { + next; + } + } + + # If have file name but not date and author, and see date or + # author, then grab them: + unless (defined $time) { + if (/^date: .*/) + { + ($time, $author) = &parse_date_and_author ($_); + if (defined ($usermap{$author}) and $usermap{$author}) { + $author = $usermap{$author}; + } + } + else { + $detected_file_separator = /^$file_separator$/o; + if ($detected_file_separator) { + # No revisions for this file; can happen, e.g. "cvs log -d DATE" + goto CLEAR; + } + } + # If the date/time/author hasn't been found yet, we couldn't + # possibly care about anything we see. So skip: + next; + } + + # A "branches: ..." line here indicates that one or more branches + # are rooted at this revision. If we're showing branches, then we + # want to show that fact as well, so we collect all the branches + # that this is the latest ancestor of and store them in + # @branch_roots. Just for reference, the format of the line we're + # seeing at this point is: + # + # branches: 1.5.2; 1.5.4; ...; + # + # Okay, here goes: + + if (/^branches:\s+(.*);$/) + { + if ($Show_Branches) + { + my $lst = $1; + $lst =~ s/(1\.)+1;|(1\.)+1$//; # ignore the trivial branch 1.1.1 + if ($lst) { + @branch_roots = split (/;\s+/, $lst); + } + else { + undef @branch_roots; + } + next; + } + else + { + # Ugh. This really bothers me. Suppose we see a log entry + # like this: + # + # ---------------------------- + # revision 1.1 + # date: 1999/10/17 03:07:38; author: jrandom; state: Exp; + # branches: 1.1.2; + # Intended first line of log message begins here. + # ---------------------------- + # + # The question is, how we can tell the difference between that + # log message and a *two*-line log message whose first line is + # + # "branches: 1.1.2;" + # + # See the problem? The output of "cvs log" is inherently + # ambiguous. + # + # For now, we punt: we liberally assume that people don't + # write log messages like that, and just toss a "branches:" + # line if we see it but are not showing branches. I hope no + # one ever loses real log data because of this. + next; + } + } + + # If have file name, time, and author, then we're just grabbing + # log message texts: + $detected_file_separator = /^$file_separator$/o; + if ($detected_file_separator && ! (defined $revision)) { + # No revisions for this file; can happen, e.g. "cvs log -d DATE" + goto CLEAR; + } + unless ($detected_file_separator || /^$logmsg_separator$/o) + { + $msg_txt .= $_; # Normally, just accumulate the message... + next; + } + # ... until a msg separator is encountered: + # Ensure the message contains something: + if ((! $msg_txt) + || ($msg_txt =~ /^\s*\.\s*$|^\s*$/) + || ($msg_txt =~ /\*\*\* empty log message \*\*\*/)) { + if ($Prune_Empty_Msgs) { + goto CLEAR; + } + # else + $msg_txt = "[no log message]\n"; + } + + ### Store it all in the Grand Poobah: + { + my $dir_key; # key into %grand_poobah + my %qunk; # complicated little jobbie, see below + + # Each revision of a file has a little data structure (a `qunk') + # associated with it. That data structure holds not only the + # file's name, but any additional information about the file + # that might be needed in the output, such as the revision + # number, tags, branches, etc. The reason to have these things + # arranged in a data structure, instead of just appending them + # textually to the file's name, is that we may want to do a + # little rearranging later as we write the output. For example, + # all the files on a given tag/branch will go together, followed + # by the tag in parentheses (so trunk or otherwise non-tagged + # files would go at the end of the file list for a given log + # message). This rearrangement is a lot easier to do if we + # don't have to reparse the text. + # + # A qunk looks like this: + # + # { + # filename => "hello.c", + # revision => "1.4.3.2", + # time => a timegm() return value (moment of commit) + # tags => [ "tag1", "tag2", ... ], + # branch => "branchname" # There should be only one, right? + # branchroots => [ "branchtag1", "branchtag2", ... ] + # } + + if ($Distributed) { + # Just the basename, don't include the path. + ($qunk{'filename'}, $dir_key, undef) = fileparse ($file_full_path); + } + else { + $dir_key = "./"; + $qunk{'filename'} = $file_full_path; + } + + # This may someday be used in a more sophisticated calculation + # of what other files are involved in this commit. For now, we + # don't use it, because the common-commit-detection algorithm is + # hypothesized to be "good enough" as it stands. + $qunk{'time'} = $time; + + # We might be including revision numbers and/or tags and/or + # branch names in the output. Most of the code from here to + # loop-end deals with organizing these in qunk. + + $qunk{'revision'} = $revision; + + # Grab the branch, even though we may or may not need it: + $qunk{'revision'} =~ /([\d.]+)\d+/; + my $branch_prefix = $1; + $branch_prefix =~ s/\.$//; # strip off final dot + if ($branch_names{$branch_prefix}) { + $qunk{'branch'} = $branch_names{$branch_prefix}; + } + + # If there's anything in the @branch_roots array, then this + # revision is the root of at least one branch. We'll display + # them as branch names instead of revision numbers, the + # substitution for which is done directly in the array: + if (@branch_roots) { + my @roots = map { $branch_names{$_} } @branch_roots; + $qunk{'branchroots'} = \@roots; + } + + # Save tags too. + if (defined ($symbolic_names{$revision})) { + $qunk{'tags'} = $symbolic_names{$revision}; + delete $symbolic_names{$revision}; + } + + # Add this file to the list + # (We use many spoonfuls of autovivication magic. Hashes and arrays + # will spring into existence if they aren't there already.) + + &debug ("(pushing log msg for ${dir_key}$qunk{'filename'})\n"); + + # Store with the files in this commit. Later we'll loop through + # again, making sure that revisions with the same log message + # and nearby commit times are grouped together as one commit. + push (@{$grand_poobah{$dir_key}{$author}{$time}{$msg_txt}}, \%qunk); + } + + CLEAR: + # Make way for the next message + undef $msg_txt; + undef $time; + undef $revision; + undef $author; + undef @branch_roots; + + # Maybe even make way for the next file: + if ($detected_file_separator) { + undef $file_full_path; + undef %branch_names; + } + } + + close (LOG_SOURCE); + + ### Process each ChangeLog + + while (my ($dir,$authorhash) = each %grand_poobah) + { + &debug ("DOING DIR: $dir\n"); + + # Here we twist our hash around, from being + # author => time => message => filelist + # in %$authorhash to + # time => author => message => filelist + # in %changelog. + # + # This is also where we merge entries. The algorithm proceeds + # through the timeline of the changelog with a sliding window of + # $Max_Checkin_Duration seconds; within that window, entries that + # have the same log message are merged. + # + # (To save space, we zap %$authorhash after we've copied + # everything out of it.) + + my %changelog; + while (my ($author,$timehash) = each %$authorhash) + { + my $lasttime; + my %stamptime; + foreach my $time (sort {$main::a <=> $main::b} (keys %$timehash)) + { + my $msghash = $timehash->{$time}; + while (my ($msg,$qunklist) = each %$msghash) + { + my $stamptime = $stamptime{$msg}; + if ((defined $stamptime) + and (($time - $stamptime) < $Max_Checkin_Duration) + and (defined $changelog{$stamptime}{$author}{$msg})) + { + push(@{$changelog{$stamptime}{$author}{$msg}}, @$qunklist); + } + else { + $changelog{$time}{$author}{$msg} = $qunklist; + $stamptime{$msg} = $time; + } + } + } + } + undef (%$authorhash); + + ### Now we can write out the ChangeLog! + + my ($logfile_here, $logfile_bak, $tmpfile); + + if (! $Output_To_Stdout) { + $logfile_here = $dir . $Log_File_Name; + $logfile_here =~ s/^\.\/\//\//; # fix any leading ".//" problem + $tmpfile = "${logfile_here}.cvs2cl$$.tmp"; + $logfile_bak = "${logfile_here}.bak"; + + open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\""; + } + else { + open (LOG_OUT, ">-") or die "Unable to open stdout for writing"; + } + + print LOG_OUT $ChangeLog_Header; + + if ($XML_Output) { + print LOG_OUT "\n\n\n\n"; + } + + foreach my $time (sort {$main::b <=> $main::a} (keys %changelog)) + { + my $authorhash = $changelog{$time}; + while (my ($author,$mesghash) = each %$authorhash) + { + while (my ($msg,$qunklist) = each %$mesghash) + { + my $files = &pretty_file_list ($qunklist); + my $logtext = &pretty_msg_text ($msg); + my $header_line; # date and author + my $body; # see below + my $wholething; # $header_line + $body + + $body = $files . (($XML_Output) ? "" : $After_Header) . $logtext; + + # kff todo: do some more XML munging here, on the header + # part of the entry: + + my ($ignore,$min,$hour,$mday,$mon,$year,$wday) + = $UTC_Times ? gmtime($time) : localtime($time); + + # XML output includes everything else, we might as well make + # it always include Day Of Week too, for consistency. + if ($Show_Day_Of_Week or $XML_Output) { + $wday = ("Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday")[$wday]; + $wday = ($XML_Output) ? "${wday}\n" : " $wday"; + } + else { + $wday = ""; + } + + if ($XML_Output) { + $author = &xml_escape ($author); + $header_line = + sprintf ("%4u-%02u-%02u\n" + . "${wday}" + . "\n" + . "%s\n", + $year+1900, $mon+1, $mday, $hour, $min, $author); + } + else { + $header_line = + sprintf ("%4u-%02u-%02u${wday} %02u:%02u %s\n\n", + $year+1900, $mon+1, $mday, $hour, $min, $author); + } + + unless ($XML_Output) { + $body = wrap ("\t", " ", "$body"); + } + + $wholething = $header_line . $body; + + if ($XML_Output) { + $wholething = "\n${wholething}\n"; + } + + # One last check: make sure it passes the regexp test, if the + # user asked for that. We have to do it here, so that the + # test can match against information in the header as well + # as in the text of the log message. + + # How annoying to duplicate so much code just because I + # can't figure out a way to evaluate scalars on the trailing + # operator portion of a regular expression. Grrr. + if ($Case_Insensitive) { + unless ($Regexp_Gate && ($wholething !~ /$Regexp_Gate/oi)) { + print LOG_OUT "${wholething}\n"; + } + } + else { + unless ($Regexp_Gate && ($wholething !~ /$Regexp_Gate/o)) { + print LOG_OUT "${wholething}\n"; + } + } + } + } + } + + if ($XML_Output) { + print LOG_OUT "\n"; + } + + close (LOG_OUT); + + if (! $Output_To_Stdout) + { + if (-f $logfile_here) { + rename ($logfile_here, $logfile_bak); + } + rename ($tmpfile, $logfile_here); + } + } +} + + +sub parse_date_and_author () +{ + # Parses the date/time and author out of a line like: + # + # date: 1999/02/19 23:29:05; author: apharris; state: Exp; + + my $line = shift; + + my ($year, $mon, $mday, $hours, $min, $secs, $author) = $line =~ + m#(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+);\s+author:\s+([^;]+);# + or die "Couldn't parse date ``$line''"; + die "Bad date or Y2K issues" unless ($year > 1969 and $year < 2258); + # Kinda arbitrary, but useful as a sanity check + my $time = timegm($secs,$min,$hours,$mday,$mon-1,$year-1900); + + return ($time, $author); +} + + +# Here we take a bunch of qunks and convert them into printed +# summary that will include all the information the user asked for. +sub pretty_file_list () +{ + my $qunksref = shift; + my @qunkrefs = @$qunksref; + my @filenames; + my $beauty = ""; # The accumulating header string for this entry. + my %non_unanimous_tags; # Tags found in a proper subset of qunks + my %unanimous_tags; # Tags found in all qunks + my %all_branches; # Branches found in any qunk + my $common_dir; # Dir of all files, or "" if no common dir + my $fbegun = 0; # Did we begin printing filenames yet? + + # First, loop over the qunks gathering all the tag/branch names. + # We'll put them all in non_unanimous_tags, and take out the + # unanimous ones later. + foreach my $qunkref (@qunkrefs) + { + # Keep track of whether all the files in this commit were in the + # same directory, and memorize it if so. We can make the output a + # little more compact by mentioning the directory only once. + if ((scalar (@qunkrefs)) > 1) + { + if (! (defined ($common_dir))) { + my ($base, $dir); + ($base, $dir, undef) = fileparse ($$qunkref{'filename'}); + ($dir eq "./") ? ($common_dir = "") : ($common_dir = $dir); + } + elsif ($common_dir) { + $common_dir = &common_path_prefix ($$qunkref{'filename'}, $common_dir); + } + } + else # only one file in this entry anyway, so common dir not an issue + { + $common_dir = ""; + } + + if (defined ($$qunkref{'branch'})) { + $all_branches{$$qunkref{'branch'}} = 1; + } + if (defined ($$qunkref{'tags'})) { + foreach my $tag (@{$$qunkref{'tags'}}) { + $non_unanimous_tags{$tag} = 1; + } + } + } + + # Any tag held by all qunks will be printed specially... but only if + # there are multiple qunks in the first place! + if ((scalar (@qunkrefs)) > 1) { + foreach my $tag (keys (%non_unanimous_tags)) { + my $everyone_has_this_tag = 1; + foreach my $qunkref (@qunkrefs) { + if ((! (defined ($$qunkref{'tags'}))) + or (! (grep ($_ eq $tag, @{$$qunkref{'tags'}})))) { + $everyone_has_this_tag = 0; + } + } + if ($everyone_has_this_tag) { + $unanimous_tags{$tag} = 1; + delete $non_unanimous_tags{$tag}; + } + } + } + + if ($XML_Output) + { + # If outputting XML, then our task is pretty simple, because we + # don't have to detect common dir, common tags, branch prefixing, + # etc. We just output exactly what we have, and don't worry about + # redundancy or readability. + + foreach my $qunkref (@qunkrefs) + { + my $filename = $$qunkref{'filename'}; + my $revision = $$qunkref{'revision'}; + my $tags = $$qunkref{'tags'}; + my $branch = $$qunkref{'branch'}; + my $branchroots = $$qunkref{'branchroots'}; + + $filename = &xml_escape ($filename); # probably paranoia + $revision = &xml_escape ($revision); # definitely paranoia + + $beauty .= "\n"; + $beauty .= "${filename}\n"; + $beauty .= "${revision}\n"; + if ($branch) { + $branch = &xml_escape ($branch); # more paranoia + $beauty .= "${branch}\n"; + } + foreach my $tag (@$tags) { + $tag = &xml_escape ($tag); # by now you're used to the paranoia + $beauty .= "${tag}\n"; + } + foreach my $root (@$branchroots) { + $root = &xml_escape ($root); # which is good, because it will continue + $beauty .= "${root}\n"; + } + $beauty .= "\n"; + } + + # Theoretically, we could go home now. But as long as we're here, + # let's print out the common_dir and utags, as a convenience to + # the receiver (after all, earlier code calculated that stuff + # anyway, so we might as well take advantage of it). + + if ((scalar (keys (%unanimous_tags))) > 1) { + foreach my $utag ((keys (%unanimous_tags))) { + $utag = &xml_escape ($utag); # the usual paranoia + $beauty .= "${utag}\n"; + } + } + if ($common_dir) { + $common_dir = &xml_escape ($common_dir); + $beauty .= "${common_dir}\n"; + } + + # That's enough for XML, time to go home: + return $beauty; + } + + # Else not XML output, so complexly compactify for chordate + # consumption. At this point we have enough global information + # about all the qunks to organize them non-redundantly for output. + + if ($common_dir) { + # Note that $common_dir still has its trailing slash + $beauty .= "$common_dir: "; + } + + if ($Show_Branches) + { + # For trailing revision numbers. + my @brevisions; + + foreach my $branch (keys (%all_branches)) + { + foreach my $qunkref (@qunkrefs) + { + if ((defined ($$qunkref{'branch'})) + and ($$qunkref{'branch'} eq $branch)) + { + if ($fbegun) { + # kff todo: comma-delimited in XML too? Sure. + $beauty .= ", "; + } + else { + $fbegun = 1; + } + my $fname = substr ($$qunkref{'filename'}, length ($common_dir)); + $beauty .= $fname; + $$qunkref{'printed'} = 1; # Just setting a mark bit, basically + + if ($Show_Tags && (defined @{$$qunkref{'tags'}})) { + my @tags = grep ($non_unanimous_tags{$_}, @{$$qunkref{'tags'}}); + if (@tags) { + $beauty .= " (tags: "; + $beauty .= join (', ', @tags); + $beauty .= ")"; + } + } + + if ($Show_Revisions) { + # Collect the revision numbers' last components, but don't + # print them -- they'll get printed with the branch name + # later. + $$qunkref{'revision'} =~ /.+\.([\d])+$/; + push (@brevisions, $1); + + # todo: we're still collecting branch roots, but we're not + # showing them anywhere. If we do show them, it would be + # nifty to just call them revision "0" on a the branch. + # Yeah, that's the ticket. + } + } + } + $beauty .= " ($branch"; + if (@brevisions) { + if ((scalar (@brevisions)) > 1) { + $beauty .= ".["; + $beauty .= (join (',', @brevisions)); + $beauty .= "]"; + } + else { + $beauty .= ".$brevisions[0]"; + } + } + $beauty .= ")"; + } + } + + # Okay; any qunks that were done according to branch are taken care + # of, and marked as printed. Now print everyone else. + + foreach my $qunkref (@qunkrefs) + { + next if (defined ($$qunkref{'printed'})); # skip if already printed + + if ($fbegun) { + $beauty .= ", "; + } + else { + $fbegun = 1; + } + $beauty .= substr ($$qunkref{'filename'}, length ($common_dir)); + $$qunkref{'printed'} = 1; # Set a mark bit. + + if ($Show_Revisions || $Show_Tags) + { + my $started_addendum = 0; + + if ($Show_Revisions) { + $started_addendum = 1; + $beauty .= " ("; + $beauty .= "$$qunkref{'revision'}"; + } + if ($Show_Tags && (defined $$qunkref{'tags'})) { + my @tags = grep ($non_unanimous_tags{$_}, @{$$qunkref{'tags'}}); + if ((scalar (@tags)) > 0) { + if ($started_addendum) { + $beauty .= ", "; + } + else { + $beauty .= " (tags: "; + } + $beauty .= join (', ', @tags); + $started_addendum = 1; + } + } + if ($started_addendum) { + $beauty .= ")"; + } + } + } + + # Unanimous tags always come last. + if ($Show_Tags && %unanimous_tags) + { + $beauty .= " (utags: "; + $beauty .= join (', ', keys (%unanimous_tags)); + $beauty .= ")"; + } + + # todo: still have to take care of branch_roots? + + $beauty = "* $beauty:"; + + return $beauty; +} + + +sub common_path_prefix () +{ + my $path1 = shift; + my $path2 = shift; + + my ($dir1, $dir2); + (undef, $dir1, undef) = fileparse ($path1); + (undef, $dir2, undef) = fileparse ($path2); + + # Transmogrify Windows filenames to look like Unix. + # (It is far more likely that someone is running cvs2cl.pl under + # Windows than that they would genuinely have backslashes in their + # filenames.) + $dir1 =~ tr#\\#/#; + $dir2 =~ tr#\\#/#; + + my $accum1 = ""; + my $accum2 = ""; + my $last_common_prefix = ""; + + while ($accum1 eq $accum2) + { + $last_common_prefix = $accum1; + last if ($accum1 eq $dir1); + my ($tmp1) = split (/\//, (substr ($dir1, length ($accum1)))); + my ($tmp2) = split (/\//, (substr ($dir2, length ($accum2)))); + $accum1 .= "$tmp1/" if ((defined ($tmp1)) and $tmp1); + $accum2 .= "$tmp2/" if ((defined ($tmp2)) and $tmp2); + } + + return $last_common_prefix; +} + + +sub pretty_msg_text () +{ + my $text = shift; + + # Strip out carriage returns (as they probably result from DOSsy editors). + $text =~ s/\r\n/\n/g; + + # If it *looks* like two newlines, make it *be* two newlines: + $text =~ s/\n\s*\n/\n\n/g; + + # Strip off lone newlines, but only for lines that don't begin with + # whitespace or a mail-quoting character, since we want to preserve + # that kind of formatting. Also don't strip newlines that follow a + # period; we handle those specially next. + 1 while ($text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g); + + # If a newline follows a period, make sure that when we bring up the + # bottom sentence, it begins with two spaces. + 1 while ($text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2 $3/g); + + if ($XML_Output) + { + $text = &xml_escape ($text); + $text = "${text}\n"; + } + + return $text; +} + + +sub xml_escape () +{ + my $txt = shift; + $txt =~ s/&/&/g; + $txt =~ s//>/g; + return $txt; +} + + +sub maybe_read_user_map_file () +{ + my %expansions; + + if ($User_Map_File) + { + open (MAPFILE, "<$User_Map_File") + or die ("Unable to open $User_Map_File ($!)"); + + while () + { + my ($username, $expansion) = split ':'; + chomp $expansion; + $expansion =~ s/^'(.*)'$/$1/; + $expansion =~ s/^"(.*)"$/$1/; + + # If it looks like the expansion has a real name already, then + # we toss the username we got from CVS log. Otherwise, keep + # it to use in combination with the email address. + + if ($expansion =~ /^\s*<{0,1}\S+@.*/) { + # Also, add angle brackets if none present + if (! ($expansion =~ /<\S+@\S+>/)) { + $expansions{$username} = "$username <$expansion>"; + } + else { + $expansions{$username} = "$username $expansion"; + } + } + else { + $expansions{$username} = $expansion; + } + } + + close (MAPFILE); + } + + return %expansions; +} + + +sub parse_options () +{ + # Check this internally before setting the global variable. + my $output_file; + + # If this gets set, we encountered unknown options and will exit at + # the end of this subroutine. + my $exit_with_admonishment = 0; + + while (my $arg = shift (@ARGV)) + { + if ($arg =~ /^-h$|^-help$|^--help$|^--usage$|^-?$/) { + $Print_Usage = 1; + } + elsif ($arg =~ /^--debug$/) { # unadvertised option, heh + $Debug = 1; + } + elsif ($arg =~ /^--version$/) { + $Print_Version = 1; + } + elsif ($arg =~ /^-g$|^--global-opts$/) { + my $narg = shift (@ARGV) || die "$arg needs argument.\n"; + # Don't assume CVS is called "cvs" on the user's system: + $Log_Source_Command =~ s/(^\S*)/$1 $narg/; + } + elsif ($arg =~ /^-l$|^--log-opts$/) { + my $narg = shift (@ARGV) || die "$arg needs argument.\n"; + $Log_Source_Command .= " $narg"; + } + elsif ($arg =~ /^-f$|^--file$/) { + my $narg = shift (@ARGV) || die "$arg needs argument.\n"; + $output_file = $narg; + } + elsif ($arg =~ /^-U$|^--usermap$/) { + my $narg = shift (@ARGV) || die "$arg needs argument.\n"; + $User_Map_File = $narg; + } + elsif ($arg =~ /^-W$|^--window$/) { + my $narg = shift (@ARGV) || die "$arg needs argument.\n"; + $Max_Checkin_Duration = $narg; + } + elsif ($arg =~ /^-I$|^--ignore$/) { + my $narg = shift (@ARGV) || die "$arg needs argument.\n"; + push (@Ignore_Files, $narg); + } + elsif ($arg =~ /^-C$|^--case-insensitive$/) { + $Case_Insensitive = 1; + } + elsif ($arg =~ /^-R$|^--regexp$/) { + my $narg = shift (@ARGV) || die "$arg needs argument.\n"; + $Regexp_Gate = $narg; + } + elsif ($arg =~ /^--stdout$/) { + $Output_To_Stdout = 1; + } + elsif ($arg =~ /^--version$/) { + $Print_Version = 1; + } + elsif ($arg =~ /^-d$|^--distributed$/) { + $Distributed = 1; + } + elsif ($arg =~ /^-P$|^--prune$/) { + $Prune_Empty_Msgs = 1; + } + elsif ($arg =~ /^-S$|^--separate-header$/) { + $After_Header = "\n\n"; + } + elsif ($arg =~ /^--gmt$|^--utc$/) { + $UTC_Times = 1; + } + elsif ($arg =~ /^-w$|^--day-of-week$/) { + $Show_Day_Of_Week = 1; + } + elsif ($arg =~ /^-r$|^--revisions$/) { + $Show_Revisions = 1; + } + elsif ($arg =~ /^-t$|^--tags$/) { + $Show_Tags = 1; + } + elsif ($arg =~ /^-b$|^--branches$/) { + $Show_Branches = 1; + } + elsif ($arg =~ /^-F$|^--follow$/) { + my $narg = shift (@ARGV) || die "$arg needs argument.\n"; + push (@Follow_Branches, $narg); + } + elsif ($arg =~ /^--stdin$/) { + $Input_From_Stdin = 1; + } + elsif ($arg =~ /^--header$/) { + my $narg = shift (@ARGV) || die "$arg needs argument.\n"; + $ChangeLog_Header = &slurp_file ($narg); + if (! defined ($ChangeLog_Header)) { + $ChangeLog_Header = ""; + } + } + elsif ($arg =~ /^--xml$/) { + $XML_Output = 1; + } + else { + # Just add a filename as argument to the log command + $Log_Source_Command .= " $arg"; + } + } + + ## Check for contradictions... + + if ($Output_To_Stdout && $Distributed) { + print STDERR "cannot pass both --stdout and --distributed\n"; + $exit_with_admonishment = 1; + } + + if ($Output_To_Stdout && $output_file) { + print STDERR "cannot pass both --stdout and --file\n"; + $exit_with_admonishment = 1; + } + + # Or if any other error message has already been printed out, we + # just leave now: + if ($exit_with_admonishment) { + &usage (); + exit (1); + } + elsif ($Print_Usage) { + &usage (); + exit (0); + } + elsif ($Print_Version) { + &version (); + exit (0); + } + + ## Else no problems, so proceed. + + if ($Output_To_Stdout) { + undef $Log_File_Name; # not actually necessary + } + elsif ($output_file) { + $Log_File_Name = $output_file; + } +} + + +sub slurp_file () +{ + my $filename = shift || die ("no filename passed to slurp_file()"); + my $retstr; + + open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)"); + my $saved_sep = $/; + undef $/; + $retstr = ; + $/ = $saved_sep; + close (SLURPEE); + return $retstr; +} + + +sub debug () +{ + if ($Debug) { + my $msg = shift; + print STDERR $msg; + } +} + + +sub version () +{ + print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n"; +} + + +sub usage () +{ + &version (); + print <<'END_OF_INFO'; +Generate GNU-style ChangeLogs in CVS working copies. + +Notes about the output format(s): + + The default output of cvs2cl.pl is designed to be compact, formally + unambiguous, but still easy for humans to read. It is largely + self-explanatory, I hope; the one abbreviation that might not be + obvious is "utags". That stands for "universal tags" -- a + universal tag is one held by all the files in a given change entry. + + If you need output that's easy for a program to parse, use the + --xml option. Note that with XML output, just about all available + information is included with each change entry, whether you asked + for it or not, on the theory that your parser can ignore anything + it's not looking for. + +Notes about the options and arguments (the actual options are listed +last in this usage message): + + * The -I and -F options may appear multiple times. + + * To follow trunk revisions, use "-F trunk" ("-F TRUNK" also works). + This is okay because no would ever, ever be crazy enough to name a + branch "trunk", right? Right. + + * For the -U option, the UFILE should be formatted like + CVSROOT/users. That is, each line of UFILE looks like this + jrandom:jrandom@red-bean.com + or maybe even like this + jrandom:'Jesse Q. Random ' + Don't forget to quote the portion after the colon if necessary. + + * Many people want to filter by date. To do so, invoke cvs2cl.pl + like this: + cvs2cl.pl -l "-d'DATESPEC'" + where DATESPEC is any date specification valid for "cvs log -d". + (Note that CVS 1.10.7 and below requires there be no space between + -d and its argument). + +Options/Arguments: + + -h, -help, --help, or -? Show this usage and exit + --version Show version and exit + -r, --revisions Show revision numbers in output + -b, --branches Show branch names in revisions when possible + -t, --tags Show tags (symbolic names) in output + --stdin Read from stdin, don't run cvs log + --stdout Output to stdout not to ChangeLog + -d, --distributed Put ChangeLogs in subdirs + -f FILE, --file FILE Write to FILE instead of "ChangeLog" + -W SECS, --window SECS Window of time within which log entries unify + -U UFILE, --usermap UFILE Expand usernames to email addresses from UFILE + -R REGEXP, --regexp REGEXP Include only entries that match REGEXP + -I REGEXP, --ignore REGEXP Ignore files whose names match REGEXP + -C, --case-insensitive Any regexp matching is done case-insensitively + -F BRANCH, --follow BRANCH Show only revisions on or ancestral to BRANCH + -S, --separate-header Blank line between each header and log message + --gmt, --utc Show times in GMT/UTC instead of local time + -w, --day-of-week Show day of week + --header FILE Get ChangeLog header from FILE ("-" means stdin) + --xml Output XML instead of ChangeLog format + -P, --prune Don't show empty log messages + -g OPTS, --global-opts OPTS Invoke like this "cvs OPTS log ..." + -l OPTS, --log-opts OPTS Invoke like this "cvs ... log OPTS" + FILE1 [FILE2 ...] Show only log information for the named FILE(s) + +See http://www.red-bean.com/~kfogel/cvs2cl.shtml for maintenance and bug info. +END_OF_INFO +} + +__END__ + +=head1 NAME + +cvs2cl.pl - produces GNU-style ChangeLogs in CVS working copies, by + running "cvs log" and parsing the output. Shared log entries are + unified in an intuitive way. + +=head1 DESCRIPTION + +This script generates GNU-style ChangeLog files from CVS log +information. Basic usage: just run it inside a working copy and a +ChangeLog will appear. It requires repository access (i.e., 'cvs log' +must work). Run "cvs2cl.pl --help" to see more advanced options. + +See http://www.red-bean.com/~kfogel/cvs2cl.shtml for updates, and +for instructions on getting anonymous CVS access to this script. + +Maintainer: Karl Fogel +Please report bugs to . + +=head1 README + +This script generates GNU-style ChangeLog files from CVS log +information. Basic usage: just run it inside a working copy and a +ChangeLog will appear. It requires repository access (i.e., 'cvs log' +must work). Run "cvs2cl.pl --help" to see more advanced options. + +See http://www.red-bean.com/~kfogel/cvs2cl.shtml for updates, and +for instructions on getting anonymous CVS access to this script. + +Maintainer: Karl Fogel +Please report bugs to . + +=head1 PREREQUISITES + +This script requires C, C, and +C. +It also seems to require C or higher. + +=pod OSNAMES + +any + +=pod SCRIPT CATEGORIES + +Version_Control/CVS + +=cut + + +-*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- + +Note about a bug-slash-opportunity: +----------------------------------- + +There's a bug in Text::Wrap, which affects cvs2cl. This script +reveals it: + + #!/usr/bin/perl -w + + use Text::Wrap; + + my $test_text = + "This script demonstrates a bug in Text::Wrap. The very long line + following this paragraph will be relocated relative to the surrounding + text: + + ==================================================================== + + See? When the bug happens, we'll get the line of equal signs below + this paragraph, even though it should be above."; + + + # Print out the test text with no wrapping: + print "$test_text"; + print "\n"; + print "\n"; + + # Now print it out wrapped, and see the bug: + print wrap ("\t", " ", "$test_text"); + print "\n"; + print "\n"; + +If the line of equal signs were one shorter, then the bug doesn't +happen. Interesting. + +Anyway, rather than fix this in Text::Wrap, we might as well write a +new wrap() which has the following much-needed features: + +* initial indentation, like current Text::Wrap() +* subsequent line indentation, like current Text::Wrap() +* user chooses among: force-break long words, leave them alone, or die()? +* preserve existing indentation: chopped chunks from an indented line + are indented by same (like this line, not counting the asterisk!) +* optional list of things to preserve on line starts, default ">" + +Note that the last two are essentially the same concept, so unify in +implementation and give a good interface to controlling them. + +And how about: + +Optionally, when encounter a line pre-indented by same as previous +line, then strip the newline and refill, but indent by the same. +Yeah... diff --git a/tools/cvs2cl/mywrap.pl b/tools/cvs2cl/mywrap.pl new file mode 100755 index 0000000..aeaa2c0 --- /dev/null +++ b/tools/cvs2cl/mywrap.pl @@ -0,0 +1,84 @@ +#!/usr/bin/perl -w + +use Text::Wrap; + +my $test_text = +"Change from Melissa O'Neill : + +Removed some superfluous tests from conditionals (it's true that they +make it clear what is true at that point, but that could be expressed +in a comment, rather than in executed code). (Unfortunately, this +meant that I outdented a fairly large chunk of code, making the diff +look like a more extensive change than it really is.) + + This paragraph has been entirely indented. It consists of six + lines, each of which is indented by no less than three spaces. + This paragraph has been entirely indented. It consists of six + lines, each of which is indented by no less than three spaces. + This paragraph has been entirely indented. It consists of six + lines, each of which is indented by no less than three spaces. + +This script demonstrates a bug in Text::Wrap. The very long line of +equal signs that ought to come right after this paragraph will be +wrongly relocated: + +==================================================================== + +See? When the bug happens, we'll get the line of equal signs below, +even though it should be above."; + + +### My reimplementation of Text::Wrap(). + + +sub my_wrap () +{ + my $initial_indent = shift; + my $subsequent_indent = shift; + my $text = shift; + + + $text =~ s/\n/ /g; + + + +} + + + + + +# Print out the test text with no wrapping: +print "$test_text"; +print "\n"; +print "\n"; + +# Now print it out wrapped, and see the bug: +print wrap ("\t", " ", "$test_text"); +print "\n"; +print "\n"; + + +__END__ + +If the line of equal signs were one shorter, then the bug doesn't +happen. Interesting. + +Anyway, rather than fix this in Text::Wrap, we might as well write a +new wrap() which has the following much-needed features: + +* initial indentation, like current Text::Wrap() +* subsequent line indentation, like current Text::Wrap() +* user chooses among: force-break long words, leave them alone, or die()? +* preserve existing indentation: chopped chunks from an indented line + are indented by same (like this line, not counting the asterisk!) +* optional list of things to preserve on line starts, default ">" + +Note that the last two are essentially the same concept, so unify in +implementation and give a good interface to controlling them. + +And how about: + +Optionally, when encounter a line pre-indented by same as previous +line, then strip the newline and refill, but indent by the same. +Yeah...