Hi gang,

So, for the past month or so I've had the idea floating around in my 
head that it'd be really nice to be able to send HTML from a browser 
through any program, and load the output. That way you aren't 
restricted to javascript for changing a page, and lots of 
interesting possibilities are opened up. For example link URL 
rewriting can be done simply with a sed script like:

sed 's|http://lwn.net|https://lwn.net|g'

The most interesting usage I plan for it is to rewrite a much 
simpler version of readability 
http://code.google.com/p/arc90labs-readability/ in Go, though I 
haven't got around to writing it.

So anyway, attached is a patch for surf to bind a key to filter the 
downloaded HTML through another program. I confess it's my first 
time interacting with pipes with C, so it's quite possible the code 
could be improved (let me know!)

It would also be quite easy to automatically trigger some script for 
each page load (think adblocking,) though I haven't done that yet, 
and doing it cleanly may get tricky (as the easiest way would be for 
webkit to load the page, then filter the HTML, then re-load the new 
HTML, but that's not too pretty.)

On a related note, what's the etiquette regarding adding patches to 
the website? Is it just a "I think this is useful, I'll add it" 
thing, or do you prefer it to be only patches lots of people have 
acked?

I look forward to hear any comments,

Nick
diff -r dbb565b8d61c config.def.h
--- a/config.def.h      Fri Jun 25 09:42:58 2010 +0200
+++ b/config.def.h      Mon Aug 02 22:25:19 2010 +0100
@@ -37,4 +37,5 @@
     { MODKEY,               GDK_f,      spawn,      SETPROP("_SURF_FIND", 
"_SURF_FIND") },
     { MODKEY,               GDK_n,      find,       { .b = TRUE } },
     { MODKEY|GDK_SHIFT_MASK,GDK_n,      find,       { .b = FALSE } },
+    { MODKEY,               GDK_v,      filter,     { .v = ".surf/filter.sh" } 
},
 };
diff -r dbb565b8d61c surf.1
--- a/surf.1    Fri Jun 25 09:42:58 2010 +0200
+++ b/surf.1    Mon Aug 02 22:25:19 2010 +0100
@@ -89,6 +89,9 @@
 .TP
 .B Ctrl\-o
 show the sourcecode of the current page.
+.TP
+.B Ctrl\-v
+Runs page through custom filter
 .SH SEE ALSO
 .BR dmenu(1)
 .BR xprop(1)
diff -r dbb565b8d61c surf.c
--- a/surf.c    Fri Jun 25 09:42:58 2010 +0200
+++ b/surf.c    Mon Aug 02 22:25:19 2010 +0100
@@ -2,6 +2,7 @@
  *
  * To understand surf, start reading main().
  */
+#define _POSIX_SOURCE /* keep glibc happy to use fdopen */
 #include <signal.h>
 #include <X11/X.h>
 #include <X11/Xatom.h>
@@ -76,6 +77,7 @@
 static void die(char *str);
 static void drawindicator(Client *c);
 static gboolean exposeindicator(GtkWidget *w, GdkEventExpose *e, Client *c);
+static void filter(Client *c, const Arg *arg);
 static void find(Client *c, const Arg *arg);
 static const char *getatom(Client *c, int a);
 static const char *getcookies(SoupURI *uri);
@@ -276,6 +278,78 @@
        webkit_web_view_search_text(c->view, s, FALSE, forward, TRUE);
 }
 
+void
+filter(Client *c, const Arg *arg) {
+       GString *d;
+       WebKitWebFrame *frame = webkit_web_view_get_main_frame(c->view);
+       WebKitWebDataSource *source = webkit_web_frame_get_data_source(frame);
+       const char *baseuri = webkit_web_view_get_uri(c->view);
+
+       if((d = webkit_web_data_source_get_data(source)) == NULL
+                       || webkit_web_data_source_is_loading(source))
+               return;
+
+       int iop[2], oip[2];
+       pid_t pid;
+
+       if(pipe(oip)) return;
+       if(pipe(iop)) return;
+       pid = fork();
+
+       if(pid == -1) {
+               close(oip[0]);
+               close(oip[1]);
+               return;
+       } else if(pid == 0) { /* child */
+               close(iop[1]);
+               if(iop[0] != STDIN_FILENO) {
+                       dup2(iop[0], STDIN_FILENO);
+                       close(iop[0]);
+               }
+               close(oip[0]);
+               if(oip[1] != STDOUT_FILENO) {
+                       dup2(oip[1], STDOUT_FILENO);
+                       close(oip[1]);
+               }
+               execlp(buildpath((char *)arg->v), buildpath((char *)arg->v), 
baseuri, (char *)NULL);
+               exit(127);
+       } else { /* parent */
+               FILE *o, *i;
+               char buf[1024];
+               char *html = NULL;
+               int len;
+               i = fdopen(iop[1], "w");
+               o = fdopen(oip[0], "r");
+               close(iop[0]);
+               close(oip[1]);
+
+               /* send html to filter */
+               fprintf(i, "%s\n", d->str);
+               fclose(i);
+
+               /* get html from filter */
+               html = malloc(sizeof(char));
+               html[0] = '\0';
+               while(fgets(buf, sizeof(buf), o)) {
+                       len = strlen(buf) + strlen(html) + 1;
+                       html = realloc(html, len);
+                       if(!html) {
+                               fprintf(stderr, "realloc failed\n");
+                               fclose(o);
+                               return;
+                       }
+                       strncat(html, buf, len);
+               }
+               fclose(o);
+
+               /* load html into browser */
+               webkit_web_view_load_string(c->view,
+                               html,
+                               NULL, NULL, baseuri);
+               free(html);
+       }
+}
+
 const char *
 getcookies(SoupURI *uri) {
        const char *c;

Attachment: pgpN44ApTlWKA.pgp
Description: PGP signature

Reply via email to