--- wwgetall-3.21/wwgetall.pl Thu Oct 29 06:40:13 1998
+++ wwgetall-new/wwgetall.pl Fri Mar 05 21:31:09 1999
@@ -653,22 +653,23 @@
$lostline ="";
}
$line =~ s/\s+/ /g;
- $line =~ s/<\s+//>/g;
$line =~ s/\s*=\s*/=/g;
$url = '';
#print "line=[$line],lostline=[$lostline]\n";
# print "line = [$line]\n";
- if($line =~ m/]*\s)?href\=(\"([^\"]*)\"|([^\"]\S+))(\s+[^>]*)?>/i){ #"
- $url = $3 || $4;
+ if($line =~ m/]*[ "'])?href=((["'])(.*?)\3|(\S*))([^>]*)?>/i){ #"
+ $url = $4 || $5;
# print "matched url=[$url]\n";
- }elsif($line =~ m/]*\s)?src=(\"([^\"]*)\"|([^\"]\S+))(\s+[^>]*)?>/i){ #"
- $url = $3 || $4;
+ }elsif($line =~ m/]*[ "'])?src=((["'])(.*?)\3|(\S*))([^>]*)?>/i){ #"
+ $url = $4 || $5;
# print "IMG match=[$url]\n";
- }elsif($line =~ m/]*\s)?src=(\"([^\"]*)\"|([^\"]\S+))(\s+[^>]*)?>/i){ #"
- $url = $3 || $4;
- }elsif($line =~ m/]*\s)?background=(\"([^\"]*)\"|([^\"]\S+))(\s+[^>]*)?>/i){ #"
- $url = $3 || $4;
+ }elsif($line =~ m/]*[ "'])?src=((["'])(.*?)\3|(\S*))([^>]*)?>/i){ #"
+ $url = $4 || $5;
+ }elsif($line =~ m/]*[ "'])?background=((["'])(.*?)\3|(\S*))([^>]*)?>/i){ #"
+ $url = $4 || $5;
# print "Backgournd match = [$url]\n"
}
$url =~ s|\#.*$||; # erase label
--- wwgetall-3.21/abs2rel.pl Thu Oct 29 06:40:15 1998
+++ wwgetall-new/abs2rel.pl Thu Mar 04 18:15:35 1999
@@ -40,15 +40,15 @@
sub abs2rel_url
{
local($currenturl,$url) = @_;
-# local($urlhead,$urlbody);
+ local($urlhead,$urlbody);
local($currenturl0,$url0) = ($currenturl,$url);
local($host_root_dir,$host_dir);
$currenturl =~ s|[^/]*$||;
if($currenturl =~ m|^(http://[^/]+)(.*)|){
-# $urlhead = $1;
-# $urlbody = $2;
+ $urlhead = $1;
+ $urlbody = $2;
$host_root_dir = ("../" x (($urlbody =~ tr|/|/|)-1));
$host_dir = "../" . $host_root_dir;
}
@@ -81,25 +81,20 @@
print "INFO: $file is too large. skipped \n";
return;
}
- link $file,"$file.bak" || die "can't link [$file] to [$file.bak]:$!";
- open(FH_R,"$file.bak") || die "ERROR: can't open read file[$file.bak]:$!";
+# link $file,"$file.bak" || die "can't link [$file] to [$file.bak]:$!";
+ open(FH_R,"$file") || die "ERROR: can't open read file[$file.bak]:$!";
$fromfile = join('',);
close(FH_R);
&abs2rel_str($currenturl,*fromfile);
- open(FH_W,">$file.new") || die "ERROR: can't open write file[$file.new]:$!";
+ if($backup){
+ rename($file, "$file.bak") || die "can't rename [$file] to [$file.bak]:$!";
+ }
+ open(FH_W,">$file") || die "ERROR: can't open write file[$file.new]:$!";
print FH_W $fromfile;
close(FH_W);
- print "cwd=[",`pwd`,"]\n";
- print "=========$file.new created=====\n";
+# print "cwd=[",`pwd`,"]\n";
+ print "=========$file modified=====\n";
# print "FROMFILE=($fromfile)\n";
- if(! $testmode){
- unlink $file;
- link "$file.new",$file || die "can't link [$file.new] to [$file]:$!";
- unlink "$file.new";
- }
- if(! $backup){
- unlink "$file.bak" || die "can't unlink [$file.bak]:$!";
- }
}
sub abs2rel_dir
{