Proxy实验

本实验的所有内容均在proxy.c文件中完成。

感谢hankeke303 大佬提供的思路！

Part I：实现顺序的代理服务器

实现顺序的代理服务器实际上就是一个tiny服务器的复刻。

`main`函数

main函数参考书上 P672 的实例：

现在的代理服务器只支持GET方法。如果客户端请求其他方法（如POST），我们发送给它一个错误信息，并返回到主程序，主程序随后关闭连接并等待下一个连接请求。否则，读并且忽略任何请求报头。

 1int main(int argc, char **argv)
 2{
 3    int listenfd, connfd;
 4    char hostname[MAXLINE], port[MAXLINE];
 5    socklen_t clientlen;
 6    struct sockaddr_storage clientaddr;
 7
 8
 9    // 检查命令输入是否正确
10    if (argc != 2) {
11        fprintf(stderr, "usage: %s <port> \n", argv[0]);
12        exit(1);
13    }
14
15    listenfd = Open_listenfd(argv[1]);
16    // 主循环
17    while (1) {
18        clientlen = sizeof(clientaddr);
19        connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen);
20
21        Getnameinfo((SA *) &clientaddr, clientlen, hostname, MAXLINE, 
22                    port, MAXLINE, 0);
23
24        printf("Accepted connection from (%s, %s)\n", hostname, port);
25    }
26
27    printf("%s", user_agent_hdr);
28    return 0;
29}

解析URL

首先在程序中定义一个URL结构体，存放一个URL的host、端口和路径。

1// URL 结构体
2typedef struct {
3    char host[MAXLINE];
4    char port[MAXLINE];
5    char path[MAXLINE];
6} URL;

对于一个传入的URL字符串，首先忽略掉 http:// 这种协议标识，可以通过查找//字符串来实现。

然后，我们找到第一个/字符，在这个字符后面的就是路径了。如果找不到这个字符，我们就认为访问的是根路径/。随后我们将这个字符设置为\0，方便之后端口的读取。我们通过:字符来定位端口，:后面的就是端口，如果找不到: 那么就认为端口是80，然后我们将这个字符设置为\0。

最后，剩下的字符串就是host了。

 1void parseURL(char *s, URL *url) {
 2    // 忽略"http"，查找 "//" 字符串
 3    char *ptr = strstr(s, "//");
 4
 5    if (ptr != NULL) {
 6        s = ptr+2;
 7    }
 8
 9    // 找到第一个 '/' 字符，其后为路径
10    ptr = strchr(s, '/');
11    if (ptr != NULL) {
12        strcpy(url->path, ptr);
13        *ptr = '\0';    // 在原字符串中截断路径部分
14    }
15
16    // ':' 之后为端口号
17    ptr = strchr(s, ':');
18    if (ptr != NULL) {
19        strcpy(url->port, ptr + 1);
20        *ptr = '\0';
21    }
22    else {
23        // 默认端口为80
24        strcpy(url->port, "80");
25    }
26
27    strcpy(url->host, s);
28}

以http://www.example.com:8080/path/to/file为例：

步骤	操作
查找`//`子字符串	指针指向`www.example.com:8080/path`
查找第一个`/`字符	`ptr`指向`/`，`s`变为`www.example.com:8080`，`url->path`得到`/path/to/file`
查找`:`字符	`ptr`指向`:`，`s`变为`www.example.com`，`url->port`得到`8080`
获取主机名	剩余部分就是主机名，复制到`url->host`，`url->host`得到`www.example.com`

函数说明：

strchr是C语言标准库<string.h>中的一个函数，用于在字符串中查找指定字符的第一次出现位置。如果找到了指定字符，返回该字符在字符串中第一次出现的位置的指针；如果没有找到指定字符，返回NULL。

strcpy是C语言标准库<string.h>中的一个函数，用于将一个字符串复制到另一个字符串中。它会复制源字符串的内容（包括字符串的结束符\0），返回目标字符串的起始地址。

`readClient`函数

readClient函数会从客户端读取请求的全部内容，并生成即将发送到服务端的新请求。

首先，从第一行读取URL扔给parseURL函数提取。

然后，设置默认的Hosts header为URL中的host。接着从接下来读取到的headers中，如果是Hosts那么就更新Hosts，如果是User-Agent、Connection和Proxy-Connection，那么就忽略（因为我们有我们要设定的专属header）。其余的header收集起来，稍后一并作为新请求发过去。

最后，向新请求字符串中写入我们生成的HTTP头和headers。

 1void readClient(rio_t *rio, URL *url, char *data) {
 2    char host[MAXLINE];
 3    char line[MAXLINE];
 4    char other[MAXLINE];
 5    char method[MAXLINE], urlstr[MAXLINE], version[MAXLINE];
 6 
 7    Rio_readlineb(rio, line, MAXLINE);
 8    sscanf(line, "%s %s %s\n", method, urlstr, version);
 9    parseURL(urlstr, url);
10 
11    sprintf(host, "Host: %s\r\n", url->host);
12    while (Rio_readlineb(rio, line, MAXLINE) > 0) {
13        if (strcmp(line, "\r\n") == 0) {
14            break;
15        }
16        if (strncmp(line, "Host", 4) == 0) {
17            strcpy(host, line);
18        }
19        if (strncmp(line, "User-Agent", 10) && strncmp(line, "Connection", 10) 
20            && strncmp(line, "Proxy-Connection", 16)) {
21            strcat(other, line);
22        }
23    }
24    
25    sprintf(data, "%s %s HTTP/1.0\r\n"
26                "%s%s"
27                "Connection: close\r\n"
28                "Proxy-Connection: close\r\n"
29                "%s\r\n", method, url->path, host, user_agent_hdr, other);
30}

`doit`函数

doit函数的功能是处理成功连接的套接字接口。

 1void doit(int connfd) {
 2    rio_t rio;
 3    char line[MAXLINE];
 4    Rio_readinitb(&rio, connfd);
 5    
 6    URL url;
 7    char data[MAXLINE];
 8    readClient(&rio, &url, data);
 9
10    int serverfd = open_clientfd(url.host, url.port);
11    if (serverfd < 0) {
12        printf("Connection failed!\n");
13    }
14    
15    rio_readinitb(&rio, serverfd);
16    Rio_writen(serverfd, data, strlen(data));
17    
18    int len;
19    while ((len = Rio_readlineb(&rio, line, MAXLINE)) > 0) {
20        Rio_writen(connfd, line, len);
21    }
22    
23    Close(serverfd);
24}

Part II：处理多个并发请求

这一部分的任务是实现处理并发请求。

`main`函数

参考书上 P709 的实现。

 1int main(int argc, char **argv)
 2{
 3    int listenfd, connfd;
 4    char hostname[MAXLINE], port[MAXLINE];
 5    socklen_t clientlen;
 6    struct sockaddr_storage clientaddr;
 7
 8    // 引入线程
 9    pthread_t tid;
10
11    // 检查命令输入是否正确
12    if (argc != 2) {
13        fprintf(stderr, "usage: %s <port> \n", argv[0]);
14        exit(1);
15    }
16
17    // 创建线程
18    sbuf_init(&sbuf, SBUFSIZE);
19    for (int i = 0; i < NTHREADS; i++) {
20        Pthread_create(&tid, NULL, thread, NULL);
21    }
22
23    listenfd = Open_listenfd(argv[1]);
24    // 主循环
25    while (1) {
26        clientlen = sizeof(clientaddr);
27        connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen);
28
29        Getnameinfo((SA *) &clientaddr, clientlen, hostname, MAXLINE, 
30                    port, MAXLINE, 0);
31
32        printf("Accepted connection from (%s, %s)\n", hostname, port);
33        // 在循环内部不再是直接执行，而是加入缓冲区
34        sbuf_insert(&sbuf, connfd);
35    }
36    
37    printf("%s", user_agent_hdr);
38    return 0;
39}

同时加入线程执行函数：

1// 线程执行函数
2void *thread(void *vargp) {
3    Pthread_detach(pthread_self());
4    while (1) {
5        int connfd = sbuf_remove(&sbuf);
6        doit(connfd);
7        Close(connfd);
8    }
9}

`SBUF`包

SBUF用于构造生产者-消费者程序，具体实现参考书上 P705-P706 的相关内容。

 1// SBUF 结构体
 2typedef struct {
 3    int *buf;          /* Buffer array */         
 4    int n;             /* Maximum number of slots */
 5    int front;         /* buf[(front+1)%n] is first item */
 6    int rear;          /* buf[rear%n] is last item */
 7    sem_t mutex;       /* Protects accesses to buf */
 8    sem_t slots;       /* Counts available slots */
 9    sem_t items;       /* Counts available items */
10} sbuf_t;
11
12/* Create an empty, bounded, shared FIFO buffer with n slots */
13/* $begin sbuf_init */
14void sbuf_init(sbuf_t *sp, int n)
15{
16    sp->buf = Calloc(n, sizeof(int)); 
17    sp->n = n;                       /* Buffer holds max of n items */
18    sp->front = sp->rear = 0;        /* Empty buffer iff front == rear */
19    Sem_init(&sp->mutex, 0, 1);      /* Binary semaphore for locking */
20    Sem_init(&sp->slots, 0, n);      /* Initially, buf has n empty slots */
21    Sem_init(&sp->items, 0, 0);      /* Initially, buf has zero data items */
22}
23/* $end sbuf_init */
24
25/* Clean up buffer sp */
26/* $begin sbuf_deinit */
27void sbuf_deinit(sbuf_t *sp)
28{
29    Free(sp->buf);
30}
31/* $end sbuf_deinit */
32
33/* Insert item onto the rear of shared buffer sp */
34/* $begin sbuf_insert */
35void sbuf_insert(sbuf_t *sp, int item)
36{
37    P(&sp->slots);                          /* Wait for available slot */
38    P(&sp->mutex);                          /* Lock the buffer */
39    sp->buf[(++sp->rear)%(sp->n)] = item;   /* Insert the item */
40    V(&sp->mutex);                          /* Unlock the buffer */
41    V(&sp->items);                          /* Announce available item */
42}
43/* $end sbuf_insert */
44
45/* Remove and return the first item from buffer sp */
46/* $begin sbuf_remove */
47int sbuf_remove(sbuf_t *sp)
48{
49    int item;
50    P(&sp->items);                          /* Wait for available item */
51    P(&sp->mutex);                          /* Lock the buffer */
52    item = sp->buf[(++sp->front)%(sp->n)];  /* Remove the item */
53    V(&sp->mutex);                          /* Unlock the buffer */
54    V(&sp->slots);                          /* Announce available slot */
55    return item;
56}

Part III：缓存Web对象

第3个任务就是实现缓存一些网页对象。

具体地，当我们的代理访问了一个服务器网页的时候，我们需要将这个网页缓存下来，在之后的请求中就不需要再次从服务器那里请求这个网页了。

但是，缓存的大小不是无限的，这就需要我们在缓存使用满的时候驱逐一部分已经缓存的网页出去。本实验要求我们使用LRU（最近最少使用）的方法，也就是找到上一次访问时间最远的对象替换掉。

`main`函数

 1int main(int argc, char **argv)
 2{
 3    int listenfd, connfd;
 4    char hostname[MAXLINE], port[MAXLINE];
 5    socklen_t clientlen;
 6    struct sockaddr_storage clientaddr;
 7
 8    // 引入线程
 9    pthread_t tid;
10
11    // 检查命令输入是否正确
12    if (argc != 2) {
13        fprintf(stderr, "usage: %s <port> \n", argv[0]);
14        exit(1);
15    }
16
17    // 创建线程
18    sbuf_init(&sbuf, SBUFSIZE);
19    for (int i = 0; i < NTHREADS; i++) {
20        Pthread_create(&tid, NULL, thread, NULL);
21    }
22    // 初始化缓存
23    initCache();
24
25    listenfd = Open_listenfd(argv[1]);
26    // 主循环
27    while (1) {
28        clientlen = sizeof(clientaddr);
29        connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen);
30
31        Getnameinfo((SA *) &clientaddr, clientlen, hostname, MAXLINE, 
32                    port, MAXLINE, 0);
33
34        printf("Accepted connection from (%s, %s)\n", hostname, port);
35        // 在循环内部不再是直接执行，而是加入缓冲区
36        sbuf_insert(&sbuf, connfd);
37    }
38
39    printf("%s", user_agent_hdr);
40    return 0;
41}

读者-写者模型

读者-写者模型实现了多个读者可以同时读取，但是读-写、写-写会互斥。在本实验中采用的是读者优先的策略。

 1// 读者-写者模型
 2void readBegin(Cache *c) {
 3    P(&c->mutex);
 4    if (++c->read_cnt == 1) P(&c->w);
 5    V(&c->mutex);
 6}
 7
 8void readEnd(Cache *c) {
 9    P(&c->mutex);
10    if (--c->read_cnt == 0) V(&c->w);
11    V(&c->mutex);
12}
13
14void writeBegin(Cache *c) {
15    P(&c->w);
16}
17
18void writeEnd(Cache *c) {
19    V(&c->w);
20}

缓存相关

缓存结构

1typedef struct Cache {
2    bool empty;                 // 是否为空
3    URL url;                    // 缓存 URL
4    char data[MAX_OBJECT_SIZE]; // 缓存内容
5    int lru;                    // 上次访问时间
6    int read_cnt;
7    sem_t mutex, w;             // 读者-写者模型相关信号量
8} Cache;

缓存函数

初始化缓存

1// 缓存初始化
2void initCache() {
3    for (int i = 0; i < MAX_CACHE; ++i) {
4        cache[i].empty = 1;
5        Sem_init(&cache[i].mutex, 0, 1);
6        Sem_init(&cache[i].w, 0, 1);
7    }
8}

寻找缓存：枚举每一个缓存，判断是否为空，如果不为空判断URL 是否相等。

 1Cache *getCache(URL *url) {
 2    Cache *ans = NULL;
 3    for (int i = 0; i < MAX_CACHE && ans == NULL; ++i) {
 4        readBegin(&cache[i]);
 5        if (!cache[i].empty && urlEqual(&cache[i].url, url)) {
 6            ans = &cache[i];
 7        }
 8        readEnd(&cache[i]);
 9    }
10    if (ans != NULL) {
11        updateLRU(ans);
12    }
13    return ans;
14}

插入缓存

 1// 插入缓存
 2void insCache(URL *url, char *data) {
 3    Cache *pos = NULL;
 4    for (int i = 0; i < MAX_CACHE && pos == NULL; ++i) {
 5        readBegin(&cache[i]);
 6        if (cache[i].empty) {
 7            pos = &cache[i];
 8        }
 9        readEnd(&cache[i]);
10    }
11    // fprintf(stderr, "insCache: pos = %#p\n", pos);
12    if (pos != NULL) {
13        fillCache(pos, url, data);
14        return;
15    }
16    
17    int minLRU = __INT_MAX__;
18    for (int i = 0; i < MAX_CACHE; ++i) {
19        readBegin(&cache[i]);
20        if (!cache[i].empty && cache[i].lru < minLRU) {
21            minLRU = cache[i].lru;
22            pos = &cache[i];
23        }
24        readEnd(&cache[i]);
25    }
26    fillCache(pos, url, data);
27}

更新 LRU

1// 更新 LRU
2void updateLRU(Cache *c) {
3    static int clock = 0;
4    writeBegin(c);
5    c->lru = ++clock;
6    writeEnd(c);
7}

装载缓存

1// 装载缓存
2void fillCache(Cache *c, URL *url, char *data) {
3    writeBegin(c);
4    c->empty = 0;
5    urlCopy(&c->url, url);
6    strcpy(c->data, data);
7    writeEnd(c);
8    updateLRU(c);
9}

辅助函数，用来判断URL是否相等和复制URL

 1bool urlEqual(const URL *a, const URL *b) {
 2    return strcmp(a->host, b->host) == 0 &&
 3           strcmp(a->port, b->port) == 0 &&
 4           strcmp(a->path, b->path) == 0;
 5}
 6
 7void urlCopy(URL *a, const URL *b) {
 8    strcpy(a->host, b->host);
 9    strcpy(a->port, b->port);
10    strcpy(a->path, b->path);
11}

实验总结

本次的Proxy实验可以看作是对I/O、网络编程和并发编程的一次综合运用。通过实现一个简单却具有重要实际应用价值的网络代理服务器帮助我们回顾了这三部分的基本原理和核心思想。我同时进一步认识到进程（线程）作为计算机系统最成功概念的意义。

在实验过程中，我进一步巩固了LRU算法，同时意识到只要程序涉及到线程的并发运行，就必须考虑到代码的健壮性，这对我今后的学习和实践都具有重要的价值。

Proxy实验

实现网络代理服务器

Part I：实现顺序的代理服务器

`main`函数

main函数

解析URL

`readClient`函数

`doit`函数

Part II：处理多个并发请求

`main`函数

引入线程的main函数

`SBUF`包

Part III：缓存Web对象

`main`函数

读者-写者模型

缓存相关

缓存结构

缓存函数

实验总结

Part I：实现顺序的代理服务器

main函数

main函数

解析URL

readClient函数

doit函数

Part II：处理多个并发请求

main函数

引入线程的main函数

SBUF包

Part III：缓存Web对象

main函数

读者-写者模型

缓存相关

缓存结构

缓存函数

实验总结

`main`函数

`readClient`函数

`doit`函数

`main`函数

`SBUF`包

`main`函数