早期上网经常需要使用代理服务器,现在用的比较少了,大家更耳熟能详的反而是“反向代理”如Nginx。
代理服务器一般用作局域网上网,而反向代理则是把来自互联网的连接转发到局域网上,作用刚好相反。
HTTP协议自身就带有对代理服务器的支持。HTTP协议目前主要有多个版本,0.9太简单,基本不见了,1.0只支持一个连接一个请求,1.1则支持长连接,2.0极大复杂化了传输过程,支持多路复用。协议版本这么多,但是代理服务器作为中间商,可以选择一个较低的版本,用户的客户端和服务器一般都有能力适应多个版本。
代理服务器可以选择比较简单的HTTP1.0版本,一个连接就是一个请求,只需要在连接建立之后做处理,处理完请求就是简单的数据转发了。
目录
前面说的“擦除”是把后面的数据前移而不是设置为空格,设置为空格并不符合HTTP协议,服务器一般不能理解。
原则上代理服务器可以支持客户端和服务器是不同的协议版本,比如客户端是1.0而服务器是1.1,这将极大地影响程序复杂度。
虽然HTTP的BODY与代理服务器处理无关,只需要接受完头部就可以处理,但是最好整个请求完整发送,因为有些服务器不能处理请求头和BODY分开的情形。
代理服务器通过Proxy-XXXX头标进行认证,这个认证是代理服务器的认证而不是用户要访问的服务器的认证。代理服务器认证完后就应该删除这些头标,因为这些头标对目标服务器毫无意义。
CONNECT是个不常用的头标,专门用于代理。代理服务器取得目标服务器后直接连上去就可以了,然后就是双向转发数据。
下面的代码就是一个HTTP1.0代理的协议处理部分的代码,没有认证(因为用的是IP地址认证,在进入这个代码之前就已经处理过了):
- //servicethreadhttp.cpp
-
- #include "stdafx.h"
- #include "mystd.h"
- #include "Proxy.h"
- #include "httpresp.h"
-
- extern CProxyApp theApp;
-
- //HTTP协议处理线程
- DWORD ServiceThreadHttp(LPDWORD lpdwParam)
- {
- //--线程参数处理------------------------
- int cdindex;//连接数据索引
- struct ServiceData * servicedata;
-
- cdindex=((struct ThreadUserData *)lpdwParam)->index;
- servicedata=((struct ThreadUserData *)lpdwParam)->servicedata;
- //--------------------------------------
- struct ConnectionData * cd;
- struct LogStruct * logs;
-
- cd=&servicedata->connectiondataarray.pconnectiondata[cdindex];
- if(-1!=cd->log)
- {
- logs=servicedata->memlogfile.logstruct+cd->log;
- }
- else
- {
- logs=NULL;
- }
- //----------------------------------------
- struct ConfigInfo * pci;
- pci=&servicedata->serviceconfigfiledata.configarray[cd->serviceindex];
-
- int headlen;
- int port;
- char host[256];
- char uri[256];
- unsigned long addr;
- SOCKADDR_IN sa;
- BOOL isTunnel=FALSE;//是否是隧道请求
- char tunnelresponse[]="HTTP/1.0 200 Connection established\x0d\x0a"
- "Proxy-agent: FreeProxy 1.0\x0d\x0a\x0d\x0a";
-
- //退出?
- if(CONNECTIONDATA_CMD_QUIT==cd->cmd)
- {
- closesocket(cd->sdc.s);
- if(-1!=cd->log)
- {
- logs->state=LOGSTRUCT_STATE_NOUSE;
- }
- cd->state=CONNECTION_NOUSE;
- return (DWORD)-1;
- }
-
- //接收请求
- cd->sdc.bufcount=RecvHttpRequest(cd->sdc.s,cd->sdc.buf,BUFFERSIZE,
- &cd->cmd,&headlen,pci->islimitpost,1000*pci->maxpost);
- if(0>cd->sdc.bufcount)
- {
- //DebugMessage("RecvHttpRequest失败");
- closesocket(cd->sdc.s);
- if(-1!=cd->log)
- {
- logs->state=LOGSTRUCT_STATE_NOUSE;
- }
- cd->state=CONNECTION_NOUSE;
- return (DWORD)-1;
- }
-
- //分析请求
- /* char tracertfile[256];
- if(-1!=mymemindex(cd->sdc.buf,cd->sdc.bufcount,"says=%2Fnick",strlen("says=%2Fnick")))
- {
- strcpy(tracertfile,"tracert_");
- itoa(cdindex,tracertfile+strlen(tracertfile),10);
- if(-1!=cd->log)WriteTracertFile(tracertfile,logs->username,strlen(logs->username));
- WriteTracertFile(tracertfile,cd->sdc.buf,cd->sdc.bufcount);
- }*/
- if(0>GetHttpURL(cd->sdc.buf,&cd->sdc.bufcount,headlen+4,host,256,&port,uri,256))
- {
- if(pci->isenableconnect && 0<=GetTunnelURL(cd->sdc.buf,&cd->sdc.bufcount,headlen+4,host,256,&port,uri,256))
- {//是隧道请求
- isTunnel=TRUE;
- if(-1!=cd->log)
- {
- strcpy(logs->domainname,host);
- }
- }
- else
- {
- send(cd->sdc.s,httpresp400,strlen(httpresp400),0);
- closesocket(cd->sdc.s);
- if(-1!=cd->log)
- {
- logs->state=LOGSTRUCT_STATE_NOUSE;
- }
- cd->state=CONNECTION_NOUSE;
- return (DWORD)-2;
- }
- }
- else
- {
- if(-1!=cd->log)
- {
- strcpy(logs->domainname,host);
- }
- }
- ClearProxyInfo(cd->sdc.buf,&cd->sdc.bufcount);
-
- //检查目标许可
- if(IsForbidden(&theApp.bandata,host,uri))
- {
- send(cd->sdc.s,httpresp403,strlen(httpresp403),0);
- closesocket(cd->sdc.s);
- if(-1!=cd->log)
- {
- logs->state=LOGSTRUCT_STATE_NOUSE;
- }
- cd->state=CONNECTION_NOUSE;
- return (DWORD)-1;
- }
-
- //退出?
- if(CONNECTIONDATA_CMD_QUIT==cd->cmd)
- {
- closesocket(cd->sdc.s);
- if(-1!=cd->log)
- {
- logs->state=LOGSTRUCT_STATE_NOUSE;
- }
- cd->state=CONNECTION_NOUSE;
- return (DWORD)-1;
- }
-
- //记录日志,计时开始
- if(-1!=cd->log)
- {
- time(&logs->timestart);
- }
-
- //域名解析
- if(1!=GetAddrByHost(addr,host))
- {
- send(cd->sdc.s,httpresp600,strlen(httpresp600),0);
- closesocket(cd->sdc.s);
- if(-1!=cd->log)
- {
- logs->state=LOGSTRUCT_STATE_NOUSE;
- }
- cd->state=CONNECTION_NOUSE;
- return (DWORD)-3;
- }
-
- memcpy(&(sa.sin_addr.S_un.S_addr),&addr,4);
- sa.sin_family=AF_INET;
- sa.sin_port=htons((unsigned short)port);
-
- //建立SOCKET
- if(INVALID_SOCKET==(cd->sdr.s=socket(AF_INET,SOCK_STREAM,0)))
- {
- send(cd->sdc.s,httpresp601,strlen(httpresp601),0);
- closesocket(cd->sdc.s);
- if(-1!=cd->log)
- {
- logs->state=LOGSTRUCT_STATE_NOUSE;
- }
- cd->state=CONNECTION_NOUSE;
- return (DWORD)-4;
- }
-
- //退出?
- if(CONNECTIONDATA_CMD_QUIT==cd->cmd)
- {
- closesocket(cd->sdc.s);
- closesocket(cd->sdr.s);
- if(-1!=cd->log)
- {
- logs->state=LOGSTRUCT_STATE_NOUSE;
- }
- cd->state=CONNECTION_NOUSE;
- return (DWORD)-1;
- }
-
- //连接
- if(SOCKET_ERROR==connect(cd->sdr.s,(struct sockaddr *)&sa,sizeof(sa)))
- {
- send(cd->sdc.s,httpresp602,strlen(httpresp602),0);
- closesocket(cd->sdc.s);
- closesocket(cd->sdr.s);
- if(-1!=cd->log)
- {
- logs->state=LOGSTRUCT_STATE_NOUSE;
- }
- cd->state=CONNECTION_NOUSE;
- return (DWORD)-5;
- }
- else
- {
- if(-1!=cd->log)
- {
- strcpy(logs->domainname,uri);
- }
- }
-
- //退出?
- if(CONNECTIONDATA_CMD_QUIT==cd->cmd)
- {
- closesocket(cd->sdc.s);
- closesocket(cd->sdr.s);
- if(-1!=cd->log)
- {
- logs->state=LOGSTRUCT_STATE_NOUSE;
- }
- cd->state=CONNECTION_NOUSE;
- return (DWORD)-1;
- }
-
- //发送请求
- if(isTunnel)
- {
- if(SOCKET_ERROR==send(cd->sdc.s,tunnelresponse,strlen(tunnelresponse),0))
- {
- send(cd->sdc.s,httpresp603,strlen(httpresp603),0);
- closesocket(cd->sdc.s);
- closesocket(cd->sdr.s);
- if(-1!=cd->log)
- {
- logs->state=LOGSTRUCT_STATE_NOUSE;
- }
- cd->state=CONNECTION_NOUSE;
- return (DWORD)-6;
- }
- }
- if(SOCKET_ERROR==send(cd->sdr.s,cd->sdc.buf,cd->sdc.bufcount,0))
- {
- send(cd->sdc.s,httpresp603,strlen(httpresp603),0);
- closesocket(cd->sdc.s);
- closesocket(cd->sdr.s);
- if(-1!=cd->log)
- {
- logs->state=LOGSTRUCT_STATE_NOUSE;
- }
- cd->state=CONNECTION_NOUSE;
- return (DWORD)-6;
- }
- //记录字节数
- if(-1!=cd->log)
- {
- logs->bytecount+=cd->sdc.bufcount;
- }
- ///
- TraceData(servicedata->isDataTrace,&servicedata->memlogfile.logdatatrace[cd->log].dc,cd->sdc.buf,cd->sdc.bufcount);
-
- //退出?
- if(CONNECTIONDATA_CMD_QUIT==cd->cmd)
- {
- closesocket(cd->sdc.s);
- closesocket(cd->sdr.s);
- if(-1!=cd->log)
- {
- logs->state=LOGSTRUCT_STATE_NOUSE;
- }
- cd->state=CONNECTION_NOUSE;
- return (DWORD)-1;
- }
-
- //接收数据并发给客户
- TransData(cd->sdr.s,cd->sdc.s,cd->sdr.buf,BUFFERSIZE,&cd->cmd,&cd->sdr.bufcount,servicedata,cd);
- //记录字节数
- if(-1!=cd->log)
- {
- logs->bytecount+=cd->sdr.bufcount;
- }
-
- closesocket(cd->sdc.s);
- closesocket(cd->sdr.s);
- if(-1!=cd->log)
- {
- time(&logs->timeend);
- logs->state=LOGSTRUCT_STATE_USED;
- }
- cd->state=CONNECTION_NOUSE;
- return 1;
- }
-
- //接收HTTP请求(如果出错,不执行closesocket())
- int RecvHttpRequest(SOCKET s,char * buf,int buflen,int * cmd,int* headlen,BOOL islimitpost,int maxpost)
- {
- maxpost+=1;
-
- const char CRLF[]="\x0d\x0a";
- const char CRLFCRLF[]="\x0d\x0a\x0d\x0a";
- const char CONTENTLENGTH[]="Content-Length:";
-
- int recvcount=0;
- int temp;
- int recvall=0;
-
- BOOL tempbool;
- struct timeval timeout;
- timeout.tv_sec=0;
- timeout.tv_usec=100000;
-
-
- for(;1;)
- {
- //退出?
- if(CONNECTIONDATA_CMD_QUIT==*cmd)
- {
- return -1;
- }
- if(1!=IsSocketReadReady(s,timeout,tempbool))
- {
- return -2;
- }
- if(tempbool)
- {
- recvcount=recv(s,buf+recvall,buflen-recvall,0);
- }
- else
- {
- continue;
- }
- if(SOCKET_ERROR==recvcount)
- {
- return -3;
- }
- else if(0==recvcount)
- {
- return -4;
- }
- recvall+=recvcount;
- //在使用后面代码段时使用CRLFCRLF,4,否则使用CRLF,2
- temp=mymemindex(buf,recvall,(char*)CRLFCRLF,4);
- if(-1!=temp)
- {
- *headlen=temp;
- break;
- }
- }
-
- if(islimitpost && -1!=(temp=mymemindex(buf,*headlen,(char*)CONTENTLENGTH,15)))
- {
- long i;
- char len[10];
-
- if(-1==(i=mymemindex(buf+temp,buflen-temp,(char*)CRLF,2)))
- {
- return -5;
- }
- i-=strlen(CONTENTLENGTH);
- if(i>9)
- {
- return -6;
- }
- memcpy(len,buf+temp+strlen(CONTENTLENGTH),i);
- len[i]='\0';
- i=atoi(len);
- if(i>maxpost)
- {
- return -7;
- }
- }
-
- return recvall;
- }
-
- //取得URL
- int GetHttpURL(char* buf,int * buflenall,int buflen,char * host,int hostbuflen,int * port,char * uri,int uribuflen)
- {
- const char CRLF[]="\x0d\x0a";
-
- int urlstart,urlend;
- int hoststart,hostend,hostlen;
- int portstart,portend,portlen;
- int pos;
- char str[10];
-
- urlend=mymemindex(buf,buflen,(char*)CRLF,2);
- if(-1==(urlstart=mymemindex(buf,urlend,"http://",7)))
- {
- return -2;
- }
- if(urlend-urlstart>=uribuflen)
- {
- memcpy(uri,buf+urlstart,uribuflen-1);
- uri[uribuflen-1]='\0';
- }
- else
- {
- memcpy(uri,buf+urlstart,urlend-urlstart);
- uri[urlend-urlstart]='\0';
- }
-
- //得到主机名起始位置
- hoststart=urlstart+7;
-
- if(-1==(pos=mymemindex(buf+hoststart,urlend-hoststart,"/",1)))
- {
- return -3;
- }
- portend=pos+hoststart;
- pos=mymemindex(buf+hoststart,portend-hoststart,":",1);
- if(-1!=pos)//有端口
- {
- portstart=pos+hoststart+1;//得到端口起始位置
- hostend=pos+hoststart;
- portlen=portend-portstart;
- memcpy(str,buf+portstart,portlen);
- str[portlen]='\0';
- if(0==portlen) *port=80;//若端口长度为零,实际上无端口
- {
- if(0==(*port=atoi(str)))
- return -4;
- }
- }
- else//无端口
- {
- *port=80;
- hostend=portend;
- }
- hostlen=hostend-hoststart;
- if(hostlen>=hostbuflen)
- return -5;
- memcpy(host,buf+hoststart,hostlen);
- host[hostlen]='\0';
-
- //HTTP请求处理
- long i;
-
- //降版本1.1为1.0
- if('1'==buf[urlend-1])
- {
- buf[urlend-1]='0';
- }
- //擦去URL
- i=portend-urlstart;
- memmove(buf+urlstart,buf+portend,*buflenall-portend);
- *buflenall-=i;
-
- return hostlen;
- }
-
- //取得隧道请求
- int GetTunnelURL(char* buf,int * buflenall,int buflen,char * host,int hostbuflen,int * port,char * uri,int uribuflen)
- {
- const char CRLF[]="\x0d\x0a";
-
- int urlstart,urlend;
- int hoststart,hostend,hostlen;
- int portstart,portend,portlen;
- int pos;
- char str[10];
-
- urlend=mymemindex(buf,buflen,(char*)CRLF,2);
- if(buflen<8 || 0!=memcmp(buf,"CONNECT",7))
- return -2;
- if(' '!=buf[7])
- return -2;
- for(urlstart=8;urlstart
- {
- if(' '!=buf[urlstart])
- break;
- }
-
- if(urlend>=uribuflen)
- {
- memcpy(uri,buf,uribuflen-1);
- uri[uribuflen-1]='\0';
- }
- else
- {
- memcpy(uri,buf,urlend);
- uri[urlend]='\0';
- }
-
- //得到主机名起始位置
- hoststart=urlstart;
-
- if(-1==(pos=mymemindex(buf+hoststart,urlend-hoststart,"/",1)))
- {
- return -3;
- }
- portend=pos+hoststart;
- pos=mymemindex(buf+hoststart,portend-hoststart,":",1);
- if(-1!=pos)//有端口
- {
- portstart=pos+hoststart+1;//得到端口起始位置
- hostend=pos+hoststart;
- portlen=portend-portstart;
- memcpy(str,buf+portstart,portlen);
- str[portlen]='\0';
- if(0==portlen) *port=80;//若端口长度为零,实际上无端口
- {
- if(0==(*port=atoi(str)))
- return -4;
- }
- }
- else//无端口
- {
- *port=80;
- hostend=portend;
- }
- hostlen=hostend-hoststart;
- if(hostlen>=hostbuflen)
- return -5;
- memcpy(host,buf+hoststart,hostlen);
- host[hostlen]='\0';
-
- //HTTP请求处理
-
- *buflenall=0;
-
- return hostlen;
- }
-
- //清除代理信息
- int ClearProxyInfo(char * buf,int * buflenall)
- {
- const char PROXYCONNECTION[]="Proxy-Connection";
- const char CRLF[]="\x0d\x0a";
- int i,j;
-
- if(2>(i=mymemindex(buf,*buflenall,PROXYCONNECTION,strlen(PROXYCONNECTION))))return 1;//前面至少应有一个CRLF
- if(0!=memcmp(buf+i-2,CRLF,2))return 1;
- if(-1==(j=mymemindex(buf+i+strlen(PROXYCONNECTION),(*buflenall)-i-strlen(PROXYCONNECTION),CRLF,2)))
- {
- j=(*buflenall)-i-strlen(PROXYCONNECTION);
- }
- //擦去代理信息
- memmove(buf+i-2,buf+i+strlen(PROXYCONNECTION)+j,(*buflenall)-(i+strlen(PROXYCONNECTION)+j));
- *buflenall-=2+strlen(PROXYCONNECTION)+j;
- return 1;
- }
主要就是这么几件事:取出目标地址和端口,擦除目标信息,降低版本为1.0,擦除Proxy-XXXX头标,连接目标,双向转发数据。
这个代码是从实际项目中截取出来的。
(这里是结束)