[重点]delphi 实现 根据给定的标题去《中国青年报》网上电子报数据中查找匹配的内容,并从该内容中取出引题、正题、副题、作者和正文。

[重点]delphi 实现 根据给定的标题去《中国青年报》网上电子报数据中查找匹配的内容,并从该内容中取出引题、正题、副题、作者和正文。项目要求:根据给定的标题去《中国青年报》网上电子报数据中查找匹配的内容,并从该内容中取出引题、正题、作者和正文。

大家好,又见面了,我是你们的朋友全栈君。

项目要求:根据给定的标题去《中国青年报》网上电子报数据中查找匹配的内容,并从该内容中取出引题、正题、作者和正文。


unit Unit1;

interface

uses
  Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes, Vcl.Graphics,
  Vcl.Controls, Vcl.Forms, Vcl.Dialogs, Vcl.StdCtrls, IdBaseComponent,
  IdComponent, IdTCPConnection, IdTCPClient, IdHTTP, Vcl.ComCtrls;

type
  TForm1 = class(TForm)
    IdHTTP1: TIdHTTP;
    Button1: TButton;
    Label1: TLabel;
    Edit1: TEdit;
    ProgressBar1: TProgressBar;
    Memo1: TMemo;
    Button2: TButton;
    Memo2: TMemo;
    procedure Button1Click(Sender: TObject);
    procedure FormCreate(Sender: TObject);
    procedure Button2Click(Sender: TObject);
  private
    { Private declarations }
  public
    { Public declarations }
  end;

var
  Form1: TForm1;

implementation
       uses StrUtils,HttpApp;
{$R *.dfm}

type
  TDelFlags = set of (dfDelBefore, dfDelAfter);


function Delstr(var ms: String; endstr: String; Flags: TDelFlags;
  bself: Boolean = True): String;
var
  l: Integer;
begin
  l := length(endstr);
  if dfDelBefore in Flags then
  begin
    if bself then
    begin
      Result := copy(ms, 1, pos(endstr, ms) + l - 1);
      Delete(ms, 1, pos(endstr, ms) + l - 1);
    end
    else
    begin
      Result := copy(ms, 1, pos(endstr, ms) - 1);
      Delete(ms, 1, pos(endstr, ms) - 1);
    end;
  end
  else
  begin
    if bself then
    begin
      Result := copy(ms, pos(endstr, ms), length(ms));
      Delete(ms, pos(endstr, ms), length(ms));
    end
    else
    begin
      Result := copy(ms, pos(endstr, ms) + l, length(ms));
      Delete(ms, pos(endstr, ms) + l, length(ms));
    end;
  end;
end;

procedure DelstrEx(var ms: String; endstr: String;
  var DelData: String; Flags: TDelFlags; bself: Boolean = True);
var
  l: Integer;
begin
  l := length(endstr);
  if dfDelBefore in Flags then
  begin           //删除字符串的前半部分
    if bself then //连同自己一起删除
    begin
      DelData := copy(ms, 1, pos(endstr, ms) + l - 1);
      Delete(ms, 1, pos(endstr, ms) + l - 1);
    end
    else
    begin
      DelData := copy(ms, pos(endstr, ms) - 1, length(ms));
      Delete(ms, 1, pos(endstr, ms) - 1);
    end;
  end
  else
  begin
    if bself then
    begin
      DelData := copy(ms, pos(endstr, ms), length(ms));
      Delete(ms, pos(endstr, ms), length(ms)); //连同自己一起删除
    end
    else
    begin
      DelData := copy(ms, pos(endstr, ms) + l, length(ms));
      Delete(ms, pos(endstr, ms) + l, length(ms));
    end;
  end;
end; {DelstrEx}


function GetCenterStr(src, str1, str2: String): String;
var
  i, i2, i3: Integer;
begin
  i := 0;
  i2 := 0;
  i3 := 0;
  Delstr(src, str1, [dfDelBefore]);
  i := pos(AnsiLowercase(str1), AnsiLowercase(src));
  i3 := pos(AnsiLowercase(str2), AnsiLowercase(src));
  Result := copy(src, i2 + 1, i3 - i2 - 1);
end;


function delstrByNum(ss:string;uniqueFlag:string;disapperNum:integer;FromFlags: TDelFlags;bReturnDeletedPart:boolean):string;
var _num:integer;
    _Str:string;
begin
     _num:=0;
     _Str:=ss;

     result:='';

     while _num<disapperNum do
     begin
         if dfDelBefore in FromFlags then   //从字符串左端开始删除
         begin
            delstr(_Str,uniqueFlag,FromFlags);
         end
         else
         begin  //从字符串右端开始删除
           _Str:= StrUtils.ReverseString(_Str) ;

           if bReturnDeletedPart then
              delstrEx(_Str,StrUtils.ReverseString(uniqueFlag),result,[dfdelbefore])
           else
              delstr(_Str,StrUtils.ReverseString(uniqueFlag),[dfdelbefore]);

             _Str:= StrUtils.ReverseString(_Str) ;
         end;

          inc(_num);
     end;

     if result='' then result:=_Str
     else  result:= StrUtils.ReverseString(result) ;
end;




function Matchstrings(Source, pattern: String): Boolean;
var
  pSource: array[0..255] of Char;
  pPattern: array[0..255] of Char;
  function MatchPattern(element, pattern: PChar): Boolean;
    function IsPatternWild(pattern: PChar): Boolean;
    begin
      Result := StrScan(pattern, '*') <> nil;
      if not Result then
        Result := StrScan(pattern, '?') <> nil;
    end;
  begin
    if 0 = StrComp(pattern, '*') then
      Result := True
    else if (element^ = Chr(0)) and (pattern^ <> Chr(0)) then
      Result := False
    else if element^ = Chr(0) then
      Result := True
    else
    begin
      case pattern^ of
        '*':
          if MatchPattern(element, @pattern[1]) then
            Result := True
          else
            Result := MatchPattern(@element[1], pattern);
          '?':
          Result := MatchPattern(@element[1], @pattern[1]);
        else
          if element^ = pattern^ then
            Result := MatchPattern(@element[1], @pattern[1])
          else
            Result := False;
      end;
    end;
  end;
begin
  StrPCopy(pSource, Source);
  StrPCopy(pPattern, pattern);
  Result := MatchPattern(pSource, pPattern);
end; {匹配字符串函数}


{从磁盘中搜索指定类型的所有文件}
procedure FindFiles(ASourceDir, SearchFileType: String; var List: TStrings);
var
  FileRec: TSearchrec;
  Sour, OldFileName, NewFileName: String;
  fs: TFileStream;
begin
  Sour := ASourceDir;
  if Sour[length(Sour)] <> '\' then
    Sour := Sour + '\';
  if FindFirst(Sour + '*.*', faAnyfile, FileRec) = 0 then
    {循环}
    repeat
      if ((FileRec.Attr and faDirectory) <> 0) then
      begin
        if (FileRec.Name <> '.') and (FileRec.Name <> '..') then //找到目录
        begin
          FindFiles(Sour + FileRec.Name, SearchFileType, List);
        end;
      end
      else //找到文件
      begin
        if Matchstrings(LowerCase(FileRec.Name), SearchFileType) then
        begin
          List.Add(Sour + FileRec.Name);
        end; {拷贝所有类型的文件}
      end;
    until FindNext(FileRec) <> 0;
  system.SysUtils.FindClose(FileRec);
end; {从磁盘中搜索指定类型的所有文件}



procedure RmHtmlTags(var src: string);
  function DelTag(var src: string): boolean;
  var
    iPosS, iPosE: integer;
  begin
    result := False;
    if pos('<script', AnsiLowerCase(src)) > 0 then
      begin
        iPosS := pos('<script', AnsiLowerCase(src));
        if iPosS > 0 then
          begin
            iPosE := pos('</script>', AnsiLowerCase(src));
            result := iPosE > iPosS;
            if result then
              Delete(src, iPosS, iPosE - iPosS + 9);
          end;
      end
    else
      begin
        iPosS := pos('<', src);
        if iPosS > 0 then
          begin
            iPosE := pos('>', src);
            result := iPosE > iPosS;
            if result then
              Delete(src, iPosS, iPosE - iPosS + 1);
          end;
      end;
  end;
begin
  //src := LowerCase(src);
  src := src;
  repeat
  until not DelTag(src);
end;

procedure RmHtmlTagsEx(var src: string);
  function DelTag(var src: string): boolean;
  var
    iPosS, iPosE: integer;
  begin
    result := False;
    if pos('<script', AnsiLowerCase(src)) > 0 then
      begin
        iPosS := pos('<script', AnsiLowerCase(src));
        if iPosS > 0 then
          begin
            iPosE := pos('</script>', AnsiLowerCase(src));
            result := iPosE > iPosS;
            if result then
              Delete(src, iPosS, iPosE - iPosS + 9);
          end;
      end
    else
    if pos('<style', AnsiLowerCase(src)) > 0 then
      begin
        iPosS := pos('<style', AnsiLowerCase(src));
        if iPosS > 0 then
          begin
            iPosE := pos('</style>', AnsiLowerCase(src));
            result := iPosE > iPosS;
            if result then
              Delete(src, iPosS, iPosE - iPosS + 9);
          end;
      end
    else
      begin
       { iPosS := pos('<', src);
        if iPosS > 0 then
          begin
            iPosE := pos('>', src);
            result := iPosE > iPosS;
            if result then
              Delete(src, iPosS, iPosE - iPosS + 1);
          end; }
      end;
  end;
begin
  //src := LowerCase(src);
  src := src;
  repeat
  until not DelTag(src);
end;


function UrlDecoder(const AUrl:string):string;
begin
  result:= UTF8Decode(HttpDecode(AUrl));
end;

function UrlEncoder(const AUrl:string):string;
begin
//URL编码通常使用“+”来替换空格。
  result:=HttpEncode(UTF8Encode(AUrl));
end;


function  getResURL(http:TIdHttp;searchWord:string):string;
var info:tstringlist;
   res:tstringstream;
   tURL:string;
  MemoText: string;
begin
   http.HandleRedirects:=true;
   http.Request.UserAgent:='Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; InfoPath.2)';
   http.Request.Host:='search.cyol.com';
   http.Request.ContentType:='application/x-www-form-urlencoded';
   http.Request.Referer:='http://search.cyol.com/index.htm';
   http.request.CacheControl:='no-cache';
   http.HTTPOptions:=http.HTTPOptions+[hoKeepOrigProtocol];

   try
      info:=tstringlist.Create;
     res:=tstringstream.Create('',TEncoding.UTF8);

    {
      info.Add('op=new');
     info.Add('searchBtn=搜索');
     info.Add('searchText='+searchWord); //全站内模糊搜索
     // info.Add('searchText=一日为师 终身挨骂?');
    }
     info.Add('ak=');
     info.Add('ck=');
     info.Add('df=');
     info.Add('dt=');
     info.Add('nk=4');
     info.Add('od=date');
     info.Add('op=adv');
     info.Add('tk='+searchWord);

     tURL:='http://search.cyol.com/searchh.jsp';
     http.Post(tURL,info,res);
     MemoText:= res.DataString;

     delstr(MemoText,'resultdiv',[dfdelbefore]);

     //showmessage(MemoText);

     if pos('color:red',ansilowercase(MemoText))=0 then
     begin
          result:='';
          Exit;
     end;


     delstr(MemoText,'>',[dfdelbefore]);
     delstr(MemoText,'<a',[dfdelbefore]);
     delstr(MemoText,'http:',[dfdelbefore],false);
     delstr(MemoText,'.htm',[dfdelafter],false);


     result:=MemoText;


   finally
      freeandnil(info);
      freeandnil(res);
      //http.Free;
   end;
end;

function getHtmlStr(http:TIdHttp;fURL:string):string;
begin
   if assigned(http) and (http is TIdHttp) and (http<>nil) then
    result:=  http.Get(fURL);
end;



procedure TForm1.Button1Click(Sender: TObject);

var htmlText:string;
  biaoti: string;
  Author: string;
  yinti: string;
  table_Pos: Integer;
  ss: string;
  outdata: string;
  neirong: string;
  zhenwen: string;
  frontPart: string;
  subtitle: string;
  txtList: TStrings;
  i: Integer;
  readtxt: TStringList;
  zhenti: string;
  resURL: string;

begin
   button1.Caption:='正在处理'; button1.Enabled:=false;

 { htmlText:=  getHtmlStr(idHTTP1, getResURL(idHTTP1,'一日为师 终身挨骂?') );

  frontPart:=htmlText;

  delstr(frontPart,'<!--enpproperty',[dfdelbefore]);
  delstr(frontPart,'/enpproperty',[dfdelafter]);

  Author:=  GetCenterStr(frontPart,'<author>','</author>');    //作者
  subtitle:=  GetCenterStr(frontPart,'<subtitle>','</subtitle>'); //副题
  yinti:=  GetCenterStr(frontPart,'<introtitle>','</introtitle>');  //引题


  //取正文
  zhenwen:=htmlText;
  delstr(zhenwen,'<!--enpcontent-->',[dfdelbefore]);
  delstr(zhenwen,'<!--/enpcontent-->',[dfdelafter]);
  Memo1.Text:=zhenwen;

  }

  if not directoryExists(edit1.Text) then
  begin

     showmessage('请输入标引txt的路径!');
    exit;
  end;


  txtList:=tstringlist.Create ;
  readtxt:=TStringlist.Create ;
  findfiles(edit1.Text,'*.txt',txtList);

  ProgressBar1.Position:=0;
  ProgressBar1.Max:=txtlist.Count;



  try

  for i := 0 to txtList.Count-1 do
  begin
       application.ProcessMessages ;
       ProgressBar1.Position:=i+1;

       readtxt.LoadFromFile(txtList[i]);

        zhenti:=readtxt.Values['<主题>'];

        htmlText:='';  zhenwen:='';
        author:='';subtitle:=''; yinti:='';


        resURL:=getResURL(idHTTP1,trim(zhenti));

        if ''<>trim(resURL) then
        begin

            htmlText:=  getHtmlStr(idHTTP1,  resURL);

            frontPart:=htmlText;

            delstr(frontPart,'<!--enpproperty',[dfdelbefore]);
            delstr(frontPart,'/enpproperty',[dfdelafter]);

            Author:=  GetCenterStr(frontPart,'<author>','</author>');    //作者
            subtitle:=  GetCenterStr(frontPart,'<subtitle>','</subtitle>'); //副题
            yinti:=  GetCenterStr(frontPart,'<introtitle>','</introtitle>');  //引题

            //取正文
            zhenwen:=htmlText;
            delstr(zhenwen,'<!--enpcontent-->',[dfdelbefore]);
            delstr(zhenwen,'<!--/enpcontent-->',[dfdelafter]);

            RmHtmlTagsEx(zhenwen);

            if ''<>trim(yinti) then readtxt.Values['<引题>']:=yinti;
            if ''<>trim(subtitle) then readtxt.Values['<副题>']:=subtitle;
            if ''<>trim(author) then readtxt.Values['<作者>']:=author;
            if ''<>trim(zhenwen) then readtxt.Values['<正文>']:=slinebreak+trim(zhenwen);

            readtxt.SaveToFile(txtList[i]);

            readtxt.Clear ;
        end
        else
        begin
             Memo2.Lines.Add('未找到对应数据:'+txtList[i]);
        end;

  end; // for i end

  if ProgressBar1.Max=ProgressBar1.Position then
  begin
      showmessage('处理完成!');
  end;
  finally
     button1.Caption:='开始处理'; button1.Enabled:=true;
      freeandnil(readtxt);
      freeandnil(txtlist);
  end;










{  delstr(htmlText,'<body',[dfdelbefore]);
  biaoti:='biaoti';
  //取作者
  Author:=htmlText;
  delstr(Author,biaoti,[dfdelbefore]);
  delstr(Author,'rc-writer',[dfdelbefore]);
  delstr(Author,'>',[dfdelbefore]);
  delstr(Author,'<',[dfdelafter]);

  showmessage(Author);

  //取引题
  yinti:=htmlText;
  delstr(yinti,biaoti,[dfdelafter]);
  table_Pos:=0;
 //example:   ss:='<table>ccc</table><table>ddd</table>';
   yinti:=delstrByNum(yinti,'<table',1,[dfdelafter],true)+'>';
   RmHtmlTags(yinti);
   showmessage(yinti );

 //取正文内容
 neirong:='neirong';
 zhenwen:=htmlText;
 delstr(zhenwen,neirong,[dfdelbefore]);
 delstr(zhenwen,'<P',[dfdelbefore],false);
 delstr(zhenwen,'<script',[dfdelafter]);
 Memo1.Text:=zhenwen;
 }











end;

procedure TForm1.Button2Click(Sender: TObject);
var
  ss: string;
begin
   ss:=Memo1.Text;
   RmHtmlTagsEx(ss);
   memo1.Text:=ss;
end;

procedure TForm1.FormCreate(Sender: TObject);
begin
edit1.Clear ;
memo2.Clear ;
end;

end.


 

 

 

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。

发布者:全栈程序员-用户IM,转载请注明出处:https://javaforall.cn/154654.html原文链接:https://javaforall.cn

【正版授权,激活自己账号】: Jetbrains全家桶Ide使用,1年售后保障,每天仅需1毛

【官方授权 正版激活】: 官方授权 正版激活 支持Jetbrains家族下所有IDE 使用个人JB账号...

(0)


相关推荐

  • vim常用命令详解(vim使用教程)

    目录本文解读来自我对manvim的解读vim介绍vim基本操作打开文件options详解vim的五种模式(是我自己定义的五种)正常模式必要命令详解末行模式常用命令详解可视模式常用命令详解本文解读来自我对manvim的解读当然有很多解读不准确的地方,所有红色标注的地方都是最基本的,也是保证正确的。(不排除我的语言描述有问题)vim介绍vim…

  • hashmap线程不安全问题_为什么HashMap线程不安全

    hashmap线程不安全问题_为什么HashMap线程不安全HashMap的线程不安全主要体现在下面两个方面:1.在JDK1.7中,当并发执行扩容操作时会造成环形链和数据丢失的情况。2.在JDK1.8中,在并发执行put操作时会发生数据覆盖的情况。JDK1.7在JDK1.7中,扩容数据时要进行把原数据迁移到新的位置,使用的方法://数据迁移的方法,头插法添加元素voidtransfer(Entry[]newTable,booleanrehash){intnewCapacity=newTable.length;     

    2022年10月11日
  • Android面试题2019[通俗易懂]

    https://juejin.im/post/5c8211fee51d453a136e36b0#heading-56这个写的相当不错

  • kl1083_显示器dpi是什么意思

    kl1083_显示器dpi是什么意思Windy 定义了一种 Windy 数:不含前导零且相邻两个数字之差至少为 2 的正整数被称为 Windy 数。Windy 想知道,在 A 和 B 之间,包括 A 和 B,总共有多少个 Windy 数?输入格式共一行,包含两个整数 A 和 B。输出格式输出一个整数,表示答案。数据范围1≤A≤B≤2×109输入样例1:1 10输出样例1:9输入样例2:25 50输出样例2:20#include<bits/stdc++.h>using namespace std;

  • 计算机组成与设计(六)—— 乘法器[通俗易懂]

    计算机组成与设计(六)—— 乘法器[通俗易懂]乘法的运算过程人们日常习惯的乘法是十进制,但计算机实现起来不方便。首先,需要记录9×9乘法表,每次相乘去表中找结果;其次,将竖式相加也不方便。但二进制却十分方便,冯·诺伊曼在《关于END

  • 2019年日历假期添加

    2019年日历假期添加

    2021年11月27日

发表回复

您的电子邮箱地址不会被公开。

关注全栈程序员社区公众号