一个简单的多的socket http 下载原型 perl

一个简单的多的socket http 下载原型 perl

基于perl,使用IO::Select实现,并非多线程。可指定分几部分下载。
基本上没有作异常处理,没有处理redirect,甚至也没有判断对range头的响应是否为206.

还好的是它还可以工作,比wget快几倍地下载,挺好玩的.

perl module:

package HttpClient;

use strict;
use warnings;

use IO::Socket::INET;
use Data::Dumper;

my $crlf = “/r/n”;

my $buf_size = 8 * 1024;

sub new {

my $class = shift;
my %cnf = (@_);
my $self = {

state => ‘init’,
url => $cnf{url},
‘total_parts’ => $cnf{’total_parts’},
part => $cnf{part},
‘content_length’ => $cnf{’content_length’},
};
my $url = $self->{url};
my $host = $1 if $url =~ m{://([^/]*)};
my $file = $1 if $url =~ m{/([^/]*)$};
if ( defined $self->{part} ) {
$file .= “.part” . $self->{part};
}
$self->{host} = $host;
$self->{file} = $file;
my $port = 80;
$port = $1 if $host =~ /:(/d+)/;
my $sock = IO::Socket::INET->new(
PeerAddr => $host,
PeerPort => $port,
Proto => ‘tcp’,
Blocking => 0,
)
or die “can’t connect to server:$!/n”;
select($sock);
$| = 1;
select(STDOUT);
$self->{sock} = $sock;
bless $self, $class;
return $self;
}

sub sock {

return shift->{sock};
}

sub get_request_header {

my $self = shift;
return $self->{request} if defined $self->{request};
my $request =
“GET $self->{url} HTTP/1.1$crlf”
. “Host: $self->{host}$crlf”
. “Connection: close$crlf”;
if ( defined $self->{’total_parts’}
and defined $self->{part}
and defined $self->{’content_length’} )
{

my $length = $self->{’content_length’};
my $total_parts = $self->{’total_parts’};
my $part = $self->{part};
my $part_size = int( $length / $total_parts );
my $start_pos = $part_size * $part;
my $recved = 0;
if (-e $self->{file}) {

$recved = -s $self->{file};
$start_pos+=$recved;
}

my $recv_size =
( $part == $total_parts – 1 ) ? $length-$part*$part_size : $part_size;
$self->{start_pos} = $start_pos;
$self->{recv_size} = $recv_size-$recved;
print “part $self->{part} recv_size=$self->{recv_size},start_pos=$start_pos,recved=$recved,parts=$total_parts,length=$length/n”;
$request .=
“Range: bytes=$start_pos-” . ( $start_pos + $recv_size-1 ) . $crlf;
}
$request .= $crlf;
$self->{request} = $request;
return $request;
}

sub parse_header {

my ($self) = @_;
my $data = $self->{data};
return 1 if $self->{state} =~ /body/;
return 0 unless defined $data;
return 0 unless $data =~ m{^(.*?)(/r/n/r/n|/n/n)}s;
my $header_content = $1;
my $header_end = $2;
print $header_content, “/n”;
my @headers = split //r?/n/, $header_content;
die “invalid header/n” unless scalar(@headers) > 0;
my $status_line = shift @headers;
$self->{status_line} = $status_line;
$self->{code} = $2 if $status_line =~ m{HTTP/1(.1)? (/d+)};
my $last_header;
my $header = {};

foreach my $line (@headers) {

if ( $line =~ /^/s+(.*)$/ ) {

$header->{$last_header} .= ” $1″;
}
elsif ( $line =~ /^([^:]+): (.*)$/ ) {

$last_header = $1;
my $value = $2;
$header->{$last_header} = $value;
}
else {

print “invalid header:$line/n”;
}
}
$self->{header} = $header;
$self->{’content_length’} = $header->{’Content-Length’}
unless defined $self->{’content_length’};
$self->{recv_size} = $self->{’content_length’};
$self->{data} = substr($data,length($header_content)+length($header_end));
$self->{state} = ‘body’;
return 1;
}

sub recv_data {

my ( $self, $data ) = @_;
if ( defined $self->{data} ) {

$self->{data} .= $data;
}
else {

$self->{data} = $data;
}
}

sub save_data {

my ( $self, $read_select ) = @_;
my $fh = $self->{fh};
if ( !defined $fh ) {

open $fh, “>$self->{file}” or die “can’t open file $self->{file} :$!/n”;
binmode $fh,”:bytes”;
$self->{fh} = $fh;
}
my $write_len = $self->{write_len} || 0;
my $recv_size = $self->{recv_size};
my $data = $self->{data};
my $max_len = length($data);
return unless $max_len > 0;
if ($max_len+$write_len > $recv_size) {

$max_len = $recv_size – $write_len ;
my $part = $self->{part} || 0;
print “part=$part,max_len=$max_len,write_len=$write_len/n”;
}

if ( $max_len == 0 ) {

$self->{done} = 1;
close $self->{fh};
$read_select->remove( $self->sock );
close $self->{sock};
print “$self->{file} recved $write_len bytes/n”;
$self->{parent}->child_done($self) if $self->{parent};
return;
}
my $len = syswrite( $fh, $data, $max_len )
or die “write data failed :$!/n”;
$self->{data} = substr( $data, $len );
$write_len += $len;
$self->{write_len} = $write_len;
}

sub child_done {

my ( $self, $child ) = @_;
$child->{done} = 1;
return unless $self->{done};
foreach my $c ( @{ $self->{children} } ) {

return unless $c->{done};
}

print “merge file/n”;
open FH, “>>$self->{file}”;
print “first part size:”,-s $self->{file},”/n”;
seek( FH, 0, 2 );
foreach my $c ( @{ $self->{children} } ) {

print “$c->{file} size:”,-s $c->{file},”/n”;
open CFH, “<$c->{file}”;

print “merge $c->{file}/n”;
my $buf;
for ( ; ; ) {

my $len = sysread( CFH, $buf, $buf_size );
last if !defined $len || $len == 0;
syswrite( FH, $buf, $len );
}
close CFH;
unlink $c->{file};
}
close FH;
}

sub handle_read {

my ( $self, $sock, $read_select, $write_select, $sock_client ) = @_;

my $data;
my $len = sysread( $sock, $data, $buf_size );
if ( $len == 0 ) {

print “sock $sock finished/n”;
$read_select->remove($sock);
close $sock;
print “$self->{file} size=”,-s $self->{file},”/n”;
$self->{parent}->child_done($self) if $self->{parent};
return;
}
$self->recv_data($data);
if ( $self->{state} !~ /body/ and $self->parse_header ) {

if ( !defined $self->{parent} and defined $self->{content_length} ) {

my $parts = $self->{total_parts} || 5;
$self->{children} = [];
my $length = $self->{’content_length’};
my $part_size = int( $length / $parts );
$self->{recv_size} = $part_size;
print “parent recv_size=$self->{recv_size}/n”;
foreach my $part ( 1 .. $parts – 1 ) {

my $child = HttpClient->new(
url => $self->{url},
‘total_parts’ => $parts,
part => $part,
‘content_length’ => $self->{content_length},
);
$sock_client->{ $child->sock } = $child;
$child->{parent} = $self;
push @{ $self->{children} }, $child;
$read_select->add( $child->sock );
$write_select->add( $child->sock );
}
}
}
else {

$self->save_data($read_select);
}
}

sub handle_write {

my ( $self, $sock, $read_select, $write_select, $sock_client ) = @_;

my $offset = 0;
$offset = $self->{request_offset} if defined $self->{request_offset};
my $request = $self->get_request_header;
if ( $offset == 0 ) {

print “try to send request/n”;
print $request;
}
print “offset=$offset/n”;
my $len = syswrite( $sock, $request, length($request) – $offset, $offset );
if ( !defined $len ) {

print STDERR “write failed:$!/n”;
$read_select->remove($sock);
$write_select->remove($sock);
}
else {

$offset += $len;
$self->{request_offset} = $offset;
if ( $offset == length($request) ) {

$write_select->remove($sock);
}
}
}

sub start {

my ($self) = @_;
use IO::Select;
my $r = IO::Select->new;
$r->add( $self->sock );

my $w = IO::Select->new;
$w->add( $self->sock );

my $sock_client = { $self->sock => $self };

use Time::HiRes qw(time);
my $start_time = time;
for ( ; ; ) {

last if ( $r->count == 0 );
my ( $rout, $wout, $eout ) = IO::Select->select( $r, $w, $r );
last unless defined $rout;

foreach my $sock ( @{$wout} ) {

my $c = $sock_client->{$sock};
if ( !defined $c ) {

die “oops,can’t find httpclient for $sock/n”;
}
$c->handle_write( $sock, $r, $w, $sock_client );
}
foreach my $sock ( @{$rout} ) {

my $c = $sock_client->{$sock};
if ( !defined $c ) {

die “oops,can’t find httpclient for $sock/n”;
}
$c->handle_read( $sock, $r, $w, $sock_client );
}
}
my $end_time = time;
my $used_time = $end_time – $start_time;
my $speed = $self->{content_length} / $used_time;
print “Done,spend $used_time seconds,speed:$speed bytes/seconds/n”;
}

1;

test perl script:

#!/usr/bin/perl
use strict;
use warnings;

use lib ‘.’;

use HttpClient;
use Getopt::Long;
$| = 1;
#my $url = ‘http://eclipse.cdpa.nsysu.edu.tw/downloads/drops/R-3.2.1-200609210945/eclipse-SDK-3.2.1-linux-gtk.tar.gz’;

my $url = ”;
my $total_parts = 1;
my $result = GetOptions (”url|u=s” => /$url,
                        “parts|p=i”   => /$total_parts,
                        );
unless ($result and $url=~m{://}) {

    print <<HELP
usage: perl http.pl –url=url [–parts=parts]
HELP
;
    exit;
}

my $client = HttpClient->new(url=>$url,’total_parts’=>$total_parts);
$client->start();

参考:

RFC2616 – HTTP/1.1 Specification

technorati tags:perl, http, downloader, protocol

Blogged with Flock

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。

发布者:全栈程序员-用户IM,转载请注明出处:https://javaforall.cn/100597.html原文链接:https://javaforall.cn

【正版授权,激活自己账号】: Jetbrains全家桶Ide使用,1年售后保障,每天仅需1毛

【官方授权 正版激活】: 官方授权 正版激活 支持Jetbrains家族下所有IDE 使用个人JB账号...

(0)


相关推荐

  • IDEA创建springboot + mybatis项目全过程「建议收藏」

    IDEA创建springboot + mybatis项目全过程「建议收藏」鉴于隔很久再在IDEA新建springboot项目时,会出现对步骤不确定的情况,因此,写下这篇博客记录创建一个可运行的springboot+mybatis项目的全过程。步骤如下:1.打开IDEA2.File==>new==>project,如图:3.选择springInitializr==>右边的ProjectSDK我选的是我已经安…

  • java用正则表达式大全_Java 正则表达式 大全「建议收藏」

    java用正则表达式大全_Java 正则表达式 大全「建议收藏」什么是正则表达式?正则表达式的定义:这是一种字符串的语法规则,是一种可以用于模式匹配和替换的规则,主要用于文本和字符串的处理(匹配,查找,替换,判断字符串)。一个正则表达式是由普通的字符(a~z)以及特殊字符(元字符)组成的文字模式,用以描述在查找文字主体时待匹配的一个或多个字符串。下面我们会介绍一些常用的元字符正则表达式的元字符元字符的定义:元字符:元字符使得正则表达式具有处理能力,指的是那些在…

  • ant的安装和使用

    ant的安装和使用1.ant的安装1.1添加环境变量:ANT_HOME=D:\software\ant\apache-ant-1.10.1在path中添加:%ANT_HOME%\bin1.2测试是否安装成功

  • 我的工程师进阶之路 – 2022更新[通俗易懂]

    我的工程师进阶之路 – 2022更新[通俗易懂]我补充了时间轴,从进入大学,不知不觉8年就这样匆匆而过,与其感叹我更想抓紧之后的时间,不知道接下来能经历些什么。

  • linux怎么查看root权限,linux 查看当前用户是否有root权限490

    linux怎么查看root权限,linux 查看当前用户是否有root权限490匿名用户1级2016-08-13回答ssh远程控制程序。sudo提权命令。你要不会用请不要尝试。配置sudo的配置文件在/etc/sudoers里面,一般通过visudo命令来安全打开编辑。查看/etc/sudoers的用法可以使用命令:man5sudoers。在/etc/sudoers文件里面主要包括这两部分:1.别名设置别名主要包括这几种:User_Alias,Host_Alia…

  • 排序算法汇总总结

    排序算法汇总总结

发表回复

您的电子邮箱地址不会被公开。

关注全栈程序员社区公众号