httpget.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. /*
  6. * Author: Wan-Teh Chang
  7. *
  8. * Given an HTTP URL, httpget uses the GET method to fetch the file.
  9. * The fetched file is written to stdout by default, or can be
  10. * saved in an output file.
  11. *
  12. * This is a single-threaded program.
  13. */
  14. #include "prio.h"
  15. #include "prnetdb.h"
  16. #include "prlog.h"
  17. #include "prerror.h"
  18. #include "prprf.h"
  19. #include "prinit.h"
  20. #include <stdio.h>
  21. #include <string.h>
  22. #include <stdlib.h> /* for atoi */
  23. #define FCOPY_BUFFER_SIZE (16 * 1024)
  24. #define INPUT_BUFFER_SIZE 1024
  25. #define LINE_SIZE 512
  26. #define HOST_SIZE 256
  27. #define PORT_SIZE 32
  28. #define PATH_SIZE 512
  29. /*
  30. * A buffer for storing the excess input data for ReadLine.
  31. * The data in the buffer starts from (including) the element pointed to
  32. * by inputHead, and ends just before (not including) the element pointed
  33. * to by inputTail. The buffer is empty if inputHead == inputTail.
  34. */
  35. static char inputBuf[INPUT_BUFFER_SIZE];
  36. /*
  37. * inputBufEnd points just past the end of inputBuf
  38. */
  39. static char *inputBufEnd = inputBuf + sizeof(inputBuf);
  40. static char *inputHead = inputBuf;
  41. static char *inputTail = inputBuf;
  42. static PRBool endOfStream = PR_FALSE;
  43. /*
  44. * ReadLine --
  45. *
  46. * Read in a line of text, terminated by CRLF or LF, from fd into buf.
  47. * The terminating CRLF or LF is included (always as '\n'). The text
  48. * in buf is terminated by a null byte. The excess bytes are stored in
  49. * inputBuf for use in the next ReadLine call or FetchFile call.
  50. * Returns the number of bytes in buf. 0 means end of stream. Returns
  51. * -1 if read fails.
  52. */
  53. PRInt32 ReadLine(PRFileDesc *fd, char *buf, PRUint32 bufSize)
  54. {
  55. char *dst = buf;
  56. char *bufEnd = buf + bufSize; /* just past the end of buf */
  57. PRBool lineFound = PR_FALSE;
  58. char *crPtr = NULL; /* points to the CR ('\r') character */
  59. PRInt32 nRead;
  60. loop:
  61. PR_ASSERT(inputBuf <= inputHead && inputHead <= inputTail
  62. && inputTail <= inputBufEnd);
  63. while (lineFound == PR_FALSE && inputHead != inputTail
  64. && dst < bufEnd - 1) {
  65. if (*inputHead == '\r') {
  66. crPtr = dst;
  67. } else if (*inputHead == '\n') {
  68. lineFound = PR_TRUE;
  69. if (crPtr == dst - 1) {
  70. dst--;
  71. }
  72. }
  73. *(dst++) = *(inputHead++);
  74. }
  75. if (lineFound == PR_TRUE || dst == bufEnd - 1 || endOfStream == PR_TRUE) {
  76. *dst = '\0';
  77. return dst - buf;
  78. }
  79. /*
  80. * The input buffer should be empty now
  81. */
  82. PR_ASSERT(inputHead == inputTail);
  83. nRead = PR_Read(fd, inputBuf, sizeof(inputBuf));
  84. if (nRead == -1) {
  85. *dst = '\0';
  86. return -1;
  87. } else if (nRead == 0) {
  88. endOfStream = PR_TRUE;
  89. *dst = '\0';
  90. return dst - buf;
  91. }
  92. inputHead = inputBuf;
  93. inputTail = inputBuf + nRead;
  94. goto loop;
  95. }
  96. PRInt32 DrainInputBuffer(char *buf, PRUint32 bufSize)
  97. {
  98. PRInt32 nBytes = inputTail - inputHead;
  99. if (nBytes == 0) {
  100. if (endOfStream) {
  101. return -1;
  102. } else {
  103. return 0;
  104. }
  105. }
  106. if ((PRInt32) bufSize < nBytes) {
  107. nBytes = bufSize;
  108. }
  109. memcpy(buf, inputHead, nBytes);
  110. inputHead += nBytes;
  111. return nBytes;
  112. }
  113. PRStatus FetchFile(PRFileDesc *in, PRFileDesc *out)
  114. {
  115. char buf[FCOPY_BUFFER_SIZE];
  116. PRInt32 nBytes;
  117. while ((nBytes = DrainInputBuffer(buf, sizeof(buf))) > 0) {
  118. if (PR_Write(out, buf, nBytes) != nBytes) {
  119. fprintf(stderr, "httpget: cannot write to file\n");
  120. return PR_FAILURE;
  121. }
  122. }
  123. if (nBytes < 0) {
  124. /* Input buffer is empty and end of stream */
  125. return PR_SUCCESS;
  126. }
  127. while ((nBytes = PR_Read(in, buf, sizeof(buf))) > 0) {
  128. if (PR_Write(out, buf, nBytes) != nBytes) {
  129. fprintf(stderr, "httpget: cannot write to file\n");
  130. return PR_FAILURE;
  131. }
  132. }
  133. if (nBytes < 0) {
  134. fprintf(stderr, "httpget: cannot read from socket\n");
  135. return PR_FAILURE;
  136. }
  137. return PR_SUCCESS;
  138. }
  139. PRStatus FastFetchFile(PRFileDesc *in, PRFileDesc *out, PRUint32 size)
  140. {
  141. PRInt32 nBytes;
  142. PRFileMap *outfMap;
  143. void *addr;
  144. char *start;
  145. PRUint32 rem;
  146. PRUint32 bytesToRead;
  147. PRStatus rv;
  148. PRInt64 sz64;
  149. LL_UI2L(sz64, size);
  150. outfMap = PR_CreateFileMap(out, sz64, PR_PROT_READWRITE);
  151. PR_ASSERT(outfMap);
  152. addr = PR_MemMap(outfMap, LL_ZERO, size);
  153. if (addr == NULL) {
  154. fprintf(stderr, "cannot memory-map file: (%d, %d)\n", PR_GetError(),
  155. PR_GetOSError());
  156. PR_CloseFileMap(outfMap);
  157. return PR_FAILURE;
  158. }
  159. start = (char *) addr;
  160. rem = size;
  161. while ((nBytes = DrainInputBuffer(start, rem)) > 0) {
  162. start += nBytes;
  163. rem -= nBytes;
  164. }
  165. if (nBytes < 0) {
  166. /* Input buffer is empty and end of stream */
  167. return PR_SUCCESS;
  168. }
  169. bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
  170. while (rem > 0 && (nBytes = PR_Read(in, start, bytesToRead)) > 0) {
  171. start += nBytes;
  172. rem -= nBytes;
  173. bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
  174. }
  175. if (nBytes < 0) {
  176. fprintf(stderr, "httpget: cannot read from socket\n");
  177. return PR_FAILURE;
  178. }
  179. rv = PR_MemUnmap(addr, size);
  180. PR_ASSERT(rv == PR_SUCCESS);
  181. rv = PR_CloseFileMap(outfMap);
  182. PR_ASSERT(rv == PR_SUCCESS);
  183. return PR_SUCCESS;
  184. }
  185. PRStatus ParseURL(char *url, char *host, PRUint32 hostSize,
  186. char *port, PRUint32 portSize, char *path, PRUint32 pathSize)
  187. {
  188. char *start, *end;
  189. char *dst;
  190. char *hostEnd;
  191. char *portEnd;
  192. char *pathEnd;
  193. if (strncmp(url, "http", 4)) {
  194. fprintf(stderr, "httpget: the protocol must be http\n");
  195. return PR_FAILURE;
  196. }
  197. if (strncmp(url + 4, "://", 3) || url[7] == '\0') {
  198. fprintf(stderr, "httpget: malformed URL: %s\n", url);
  199. return PR_FAILURE;
  200. }
  201. start = end = url + 7;
  202. dst = host;
  203. hostEnd = host + hostSize;
  204. while (*end && *end != ':' && *end != '/') {
  205. if (dst == hostEnd - 1) {
  206. fprintf(stderr, "httpget: host name too long\n");
  207. return PR_FAILURE;
  208. }
  209. *(dst++) = *(end++);
  210. }
  211. *dst = '\0';
  212. if (*end == '\0') {
  213. PR_snprintf(port, portSize, "%d", 80);
  214. PR_snprintf(path, pathSize, "%s", "/");
  215. return PR_SUCCESS;
  216. }
  217. if (*end == ':') {
  218. end++;
  219. dst = port;
  220. portEnd = port + portSize;
  221. while (*end && *end != '/') {
  222. if (dst == portEnd - 1) {
  223. fprintf(stderr, "httpget: port number too long\n");
  224. return PR_FAILURE;
  225. }
  226. *(dst++) = *(end++);
  227. }
  228. *dst = '\0';
  229. if (*end == '\0') {
  230. PR_snprintf(path, pathSize, "%s", "/");
  231. return PR_SUCCESS;
  232. }
  233. } else {
  234. PR_snprintf(port, portSize, "%d", 80);
  235. }
  236. dst = path;
  237. pathEnd = path + pathSize;
  238. while (*end) {
  239. if (dst == pathEnd - 1) {
  240. fprintf(stderr, "httpget: file pathname too long\n");
  241. return PR_FAILURE;
  242. }
  243. *(dst++) = *(end++);
  244. }
  245. *dst = '\0';
  246. return PR_SUCCESS;
  247. }
  248. void PrintUsage(void) {
  249. fprintf(stderr, "usage: httpget url\n"
  250. " httpget -o outputfile url\n"
  251. " httpget url -o outputfile\n");
  252. }
  253. int main(int argc, char **argv)
  254. {
  255. PRHostEnt hostentry;
  256. char buf[PR_NETDB_BUF_SIZE];
  257. PRNetAddr addr;
  258. PRFileDesc *socket = NULL, *file = NULL;
  259. PRIntn cmdSize;
  260. char host[HOST_SIZE];
  261. char port[PORT_SIZE];
  262. char path[PATH_SIZE];
  263. char line[LINE_SIZE];
  264. int exitStatus = 0;
  265. PRBool endOfHeader = PR_FALSE;
  266. char *url;
  267. char *fileName = NULL;
  268. PRUint32 fileSize;
  269. if (argc != 2 && argc != 4) {
  270. PrintUsage();
  271. exit(1);
  272. }
  273. if (argc == 2) {
  274. /*
  275. * case 1: httpget url
  276. */
  277. url = argv[1];
  278. } else {
  279. if (strcmp(argv[1], "-o") == 0) {
  280. /*
  281. * case 2: httpget -o outputfile url
  282. */
  283. fileName = argv[2];
  284. url = argv[3];
  285. } else {
  286. /*
  287. * case 3: httpget url -o outputfile
  288. */
  289. url = argv[1];
  290. if (strcmp(argv[2], "-o") != 0) {
  291. PrintUsage();
  292. exit(1);
  293. }
  294. fileName = argv[3];
  295. }
  296. }
  297. if (ParseURL(url, host, sizeof(host), port, sizeof(port),
  298. path, sizeof(path)) == PR_FAILURE) {
  299. exit(1);
  300. }
  301. if (PR_GetHostByName(host, buf, sizeof(buf), &hostentry)
  302. == PR_FAILURE) {
  303. fprintf(stderr, "httpget: unknown host name: %s\n", host);
  304. exit(1);
  305. }
  306. addr.inet.family = PR_AF_INET;
  307. addr.inet.port = PR_htons((short) atoi(port));
  308. addr.inet.ip = *((PRUint32 *) hostentry.h_addr_list[0]);
  309. socket = PR_NewTCPSocket();
  310. if (socket == NULL) {
  311. fprintf(stderr, "httpget: cannot create new tcp socket\n");
  312. exit(1);
  313. }
  314. if (PR_Connect(socket, &addr, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) {
  315. fprintf(stderr, "httpget: cannot connect to http server\n");
  316. exitStatus = 1;
  317. goto done;
  318. }
  319. if (fileName == NULL) {
  320. file = PR_STDOUT;
  321. } else {
  322. file = PR_Open(fileName, PR_RDWR | PR_CREATE_FILE | PR_TRUNCATE,
  323. 00777);
  324. if (file == NULL) {
  325. fprintf(stderr, "httpget: cannot open file %s: (%d, %d)\n",
  326. fileName, PR_GetError(), PR_GetOSError());
  327. exitStatus = 1;
  328. goto done;
  329. }
  330. }
  331. cmdSize = PR_snprintf(buf, sizeof(buf), "GET %s HTTP/1.0\r\n\r\n", path);
  332. PR_ASSERT(cmdSize == (PRIntn) strlen("GET HTTP/1.0\r\n\r\n")
  333. + (PRIntn) strlen(path));
  334. if (PR_Write(socket, buf, cmdSize) != cmdSize) {
  335. fprintf(stderr, "httpget: cannot write to http server\n");
  336. exitStatus = 1;
  337. goto done;
  338. }
  339. if (ReadLine(socket, line, sizeof(line)) <= 0) {
  340. fprintf(stderr, "httpget: cannot read line from http server\n");
  341. exitStatus = 1;
  342. goto done;
  343. }
  344. /* HTTP response: 200 == OK */
  345. if (strstr(line, "200") == NULL) {
  346. fprintf(stderr, "httpget: %s\n", line);
  347. exitStatus = 1;
  348. goto done;
  349. }
  350. while (ReadLine(socket, line, sizeof(line)) > 0) {
  351. if (line[0] == '\n') {
  352. endOfHeader = PR_TRUE;
  353. break;
  354. }
  355. if (strncmp(line, "Content-Length", 14) == 0
  356. || strncmp(line, "Content-length", 14) == 0) {
  357. char *p = line + 14;
  358. while (*p == ' ' || *p == '\t') {
  359. p++;
  360. }
  361. if (*p != ':') {
  362. continue;
  363. }
  364. p++;
  365. while (*p == ' ' || *p == '\t') {
  366. p++;
  367. }
  368. fileSize = 0;
  369. while ('0' <= *p && *p <= '9') {
  370. fileSize = 10 * fileSize + (*p - '0');
  371. p++;
  372. }
  373. }
  374. }
  375. if (endOfHeader == PR_FALSE) {
  376. fprintf(stderr, "httpget: cannot read line from http server\n");
  377. exitStatus = 1;
  378. goto done;
  379. }
  380. if (fileName == NULL || fileSize == 0) {
  381. FetchFile(socket, file);
  382. } else {
  383. FastFetchFile(socket, file, fileSize);
  384. }
  385. done:
  386. if (socket) {
  387. PR_Close(socket);
  388. }
  389. if (file) {
  390. PR_Close(file);
  391. }
  392. PR_Cleanup();
  393. return exitStatus;
  394. }