On Thu, Aug 20, 2015 at 5:32 PM, Mariusz Szczepańczyk <mszczepanc...@gmail.com> wrote: > --- > configure | 3 + > libavformat/http.c | 194 > +++++++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 197 insertions(+) > > diff --git a/configure b/configure > index e67ddf6..401e041 100755 > --- a/configure > +++ b/configure > @@ -265,6 +265,7 @@ External library support: > --enable-libxcb-shm enable X11 grabbing shm communication [autodetect] > --enable-libxcb-xfixes enable X11 grabbing mouse rendering [autodetect] > --enable-libxcb-shape enable X11 grabbing shape rendering [autodetect] > + --enable-libxml2 enable HTML parsing via libxml2 [no] > --enable-libxvid enable Xvid encoding via xvidcore, > native MPEG-4/Xvid encoder exists [no] > --enable-libzmq enable message passing via libzmq [no] > @@ -1428,6 +1429,7 @@ EXTERNAL_LIBRARY_LIST=" > libxcb_shm > libxcb_shape > libxcb_xfixes > + libxml2 > libxvid > libzmq > libzvbi > @@ -5309,6 +5311,7 @@ enabled libx265 && require_pkg_config x265 > x265.h x265_api_get && > { check_cpp_condition x265.h "X265_BUILD >= 57" > || > die "ERROR: libx265 version must be >= 57."; } > enabled libxavs && require libxavs xavs.h xavs_encoder_encode > -lxavs > +enabled libxml2 && require_pkg_config libxml-2.0 libxml/parser.h > xmlInitParser > enabled libxvid && require libxvid xvid.h xvid_global -lxvidcore > enabled libzmq && require_pkg_config libzmq zmq.h zmq_ctx_new > enabled libzvbi && require libzvbi libzvbi.h vbi_decoder_new -lzvbi > diff --git a/libavformat/http.c b/libavformat/http.c > index 1eb716b..df45958 100644 > --- a/libavformat/http.c > +++ b/libavformat/http.c > @@ -21,6 +21,10 @@ > > #include "config.h" > > +#if CONFIG_LIBXML2 > +#include <libxml/HTMLparser.h> > +#endif /* CONFIG_LIBXML2 */ > + > #if CONFIG_ZLIB > #include <zlib.h> > #endif /* CONFIG_ZLIB */ > @@ -54,6 +58,16 @@ typedef enum { > FINISH > }HandshakeState; > > +typedef struct AVIODirEntryQueueNode { > + struct AVIODirEntry *entry; > + struct AVIODirEntryQueueNode *next; > +} AVIODirEntryQueueNode; > + > +typedef struct AVIODirEntryQueue { > + struct AVIODirEntryQueueNode *front; > + struct AVIODirEntryQueueNode *rear; > +} AVIODirEntryQueue; > + > typedef struct HTTPContext { > const AVClass *class; > URLContext *hd; > @@ -70,6 +84,7 @@ typedef struct HTTPContext { > char *mime_type; > char *user_agent; > char *content_type; > + char *server; > /* Set if the server correctly handles Connection: close and will close > * the connection after feeding us the content. */ > int willclose; > @@ -111,6 +126,11 @@ typedef struct HTTPContext { > int is_multi_client; > HandshakeState handshake_step; > int is_connected_server; > +#if CONFIG_LIBXML2 > + htmlParserCtxtPtr html_parser; > + AVIODirEntryQueue *entry_queue; > + AVIODirEntry *entry; > +#endif /* CONFIG_LIBXML2 */ > } HTTPContext; > > #define OFFSET(x) offsetof(HTTPContext, x) > @@ -808,6 +828,8 @@ static int process_line(URLContext *h, char *line, int > line_count, > if (!strcmp(p, "close")) > s->willclose = 1; > } else if (!av_strcasecmp(tag, "Server")) { > + av_free(s->server); > + s->server = av_strdup(p); > if (!av_strcasecmp(p, "AkamaiGHost")) { > s->is_akamai = 1; > } else if (!av_strncasecmp(p, "MediaGateway", 12)) { > @@ -1409,6 +1431,7 @@ static int http_close(URLContext *h) > if (s->hd) > ffurl_closep(&s->hd); > av_dict_free(&s->chained_options); > + av_freep(&s->server); > return ret; > } > > @@ -1471,6 +1494,167 @@ static int http_get_file_handle(URLContext *h) > return ffurl_get_file_handle(s->hd); > } > > +#if CONFIG_LIBXML2 > +static void avio_dir_entry_queue_push(AVIODirEntryQueue *queue, AVIODirEntry > *entry) > +{ > + AVIODirEntryQueueNode *node; > + > + if (!queue) > + return; > + > + node = av_mallocz(sizeof(AVIODirEntryQueueNode)); > + node->entry = entry; > + if (!queue->front) { > + queue->front = queue->rear = node; > + } else { > + queue->rear->next = node; > + queue->rear = node; > + } > +} > + > +static AVIODirEntry *avio_dir_entry_queue_pop(AVIODirEntryQueue *queue) > +{ > + AVIODirEntry *entry; > + AVIODirEntryQueueNode *tmp; > + > + if (!queue || !queue->front) > + return NULL; > + > + tmp = queue->front; > + entry = queue->front->entry; > + if (queue->front == queue->rear) > + queue->front = queue->rear = NULL; > + else > + queue->front = queue->front->next; > + > + av_freep(&tmp); > + > + return entry; > +} > + > +static const char *get_attr(const xmlChar **attrs, const char *key) > +{ > + unsigned char i; > + > + if (!attrs) > + return NULL; > + > + for (i = 0; attrs[i] && i < UCHAR_MAX - 1; i += 2) { > + if (!strcmp(attrs[i], key)) > + return attrs[i + 1]; > + } > + > + return NULL; > +} > + > +static void parse_apache(void *ctx, const xmlChar *tag, const xmlChar > **attrs) > +{ > + URLContext *h = (URLContext *) ctx; > + HTTPContext *s = h->priv_data; > + const char *url, *alt, *src; > + if (!strcmp(tag, "img")) { > + av_freep(&s->entry); > + alt = get_attr(attrs, "alt"); > + src = get_attr(attrs, "src"); > + if (alt && alt[0] == '[' > + && alt[strlen(alt) - 1] == ']' > + && strcmp(alt, "[PARENTDIR]")) { > + if (!src || strcmp(src, "/icons/back.gif")) { > + s->entry = ff_alloc_dir_entry(); > + if (!strcmp(alt, "[DIR]")) > + s->entry->type = AVIO_ENTRY_DIRECTORY; > + else > + s->entry->type = AVIO_ENTRY_FILE; > + } > + } > + } else if (!strcmp(tag, "a")) { > + if (s->entry && (url = get_attr(attrs, "href")) > + && strcmp(url, "/")) { > + s->entry->name = av_strdup(url); > + if (s->entry->name[strlen(s->entry->name) - 1] == '/') > + s->entry->name[strlen(s->entry->name) - 1] = 0; > + avio_dir_entry_queue_push(s->entry_queue, s->entry); > + s->entry = NULL; > + } else > + av_freep(&s->entry); > + } else if (!strcmp(tag, "th") && s->entry) { > + av_freep(&s->entry); > + } > +} > + > +static int http_open_dir(URLContext *h) > +{ > + HTTPContext *s = h->priv_data; > + xmlSAXHandler handlers = {}; > + int ret; > + > + if (ret = http_open(h, h->filename, 0, NULL) < 0) > + goto fail; > + > + if (!s->mime_type || !strstr(s->mime_type, "text/html")) { > + ret = AVERROR(ENOSYS); > + goto fail; > + } > + > + if (s->server && strstr(s->server, "Apache")) > + handlers.startElement = parse_apache; > + > + if (!handlers.startElement) { > + ret = AVERROR(ENOSYS); > + goto fail; > + } > + > + s->entry_queue = av_mallocz(sizeof(AVIODirEntryQueue)); > + s->html_parser = htmlCreatePushParserCtxt(&handlers, h, NULL, 0, > h->filename, XML_CHAR_ENCODING_UTF8); > + if (!s->html_parser) { > + ret = AVERROR(EIO); > + goto fail; > + } > + > + return 0; > + > +fail: > + http_close(h); > + > + return ret; > +} > + > +static int http_read_dir(URLContext *h, AVIODirEntry **next) > +{ > + HTTPContext *s = h->priv_data; > + int ret; > + char buf[BUFFER_SIZE]; > + > + if ((*next = avio_dir_entry_queue_pop(s->entry_queue))) > + return 0; > + > + while ((ret = ffurl_read(h, (unsigned char *) buf, BUFFER_SIZE - 1)) > > 0) { > + htmlParseChunk(s->html_parser, (const char *) buf, ret, 0); > + if ((*next = avio_dir_entry_queue_pop(s->entry_queue))) > + return 0; > + } > + > + if (ret < 0) > + return ret; > + > + return 0; > +} > + > +static int http_close_dir(URLContext *h) > +{ > + HTTPContext *s = h->priv_data; > + AVIODirEntry *entry; > + while (s->entry_queue && (entry = > avio_dir_entry_queue_pop(s->entry_queue))) > + av_freep(&entry); > + av_freep(&s->entry_queue); > + av_freep(&s->entry); > + htmlFreeParserCtxt(s->html_parser); > + s->html_parser = NULL; > + http_close(h); > + return 0; > +} > +#endif /* CONFIG_LIBXML2 */ > + > #define HTTP_CLASS(flavor) \ > static const AVClass flavor ## _context_class = { \ > .class_name = # flavor, \ > @@ -1493,6 +1677,11 @@ URLProtocol ff_http_protocol = { > .url_close = http_close, > .url_get_file_handle = http_get_file_handle, > .url_shutdown = http_shutdown, > +#if CONFIG_LIBXML2 > + .url_open_dir = http_open_dir, > + .url_read_dir = http_read_dir, > + .url_close_dir = http_close_dir, > +#endif /* CONFIG_LIBXML2 */ > .priv_data_size = sizeof(HTTPContext), > .priv_data_class = &http_context_class, > .flags = URL_PROTOCOL_FLAG_NETWORK, > @@ -1511,6 +1700,11 @@ URLProtocol ff_https_protocol = { > .url_close = http_close, > .url_get_file_handle = http_get_file_handle, > .url_shutdown = http_shutdown, > +#if CONFIG_LIBXML2 > + .url_open_dir = http_open_dir, > + .url_read_dir = http_read_dir, > + .url_close_dir = http_close_dir, > +#endif /* CONFIG_LIBXML2 */ > .priv_data_size = sizeof(HTTPContext), > .priv_data_class = &https_context_class, > .flags = URL_PROTOCOL_FLAG_NETWORK, > -- > 2.4.6
I don't like the commit message: this is not Apache. Perhaps what you meant was "similar to Apache" or something like that. Will check the actual patch later. > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel