#!/usr/bin/perl # mirror fetch from ncbi sra_result.csv table listing ftp address of data # # $mr ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByExp/sra/SRX/SRX151/SRX151669 ## Need option for fork/background. Ncbi/local gets sick w/ too many calls at once.. fails some. ## 2014.11: ncbi ftp allowing only 2-same time; add sleep(5); wait? ## dang new sra.csv lacks ftp: url, add it: $baseu="ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByExp/sra"; $debug=$ENV{debug}||0; $dofork=$ENV{fork}||0; $SNOOZE=$dofork; $ndone=0; #$mr='lftp -c mirror '; $mr='wget -m -nv -np -nH --cut-dirs=7 '; # wget -b will fork to background.. $mr.=" -b " if($dofork); while(<>) { next if(/^#/); # next unless(/^("|\w)); chomp; @v= map{ s/^"//; s/"$//; $_; } split","; ($u)= grep /ftp:/, @v; unless($u) { $sx=$v[0]; if($sx=~m/^[A-Z]{3}\d{6}/) { ($sa)=substr($sx,0,3); ($sb)=substr($sx,0,6); $u="$baseu/$sa/$sb/$sx"; } } $ok=0; if($u) { if(1) { # $ok= system("$mr $u ".'&'); # if($debug) { warn "$mr $u\n"); } else { $ok= system("$mr $u "); } $ok= ($debug)? "debug" : system("$mr $u "); } else { $pid=($dofork) ? fork() : 0; if ($pid) { push @pid, $pid; } else { $ok= system('lftp', '-c', 'mirror', $u); } } } warn "#fork=$ok,$pid $mr $u\n"; $ndone++; sleep(5) if($SNOOZE and $ndone % 2 == 0); } wait; # waitpid @pid;