Hi Charles, Thanks for sharing that, I will check it out. As was mentioned, it seems ripe for integrating with browser capture. On that note, have you seen org-protocol-capture-html? For articles that are primarily text, I've been capturing articles directly in Org format, but your package sounds good for capturing pages as-is.
By the way, you might want to consider integrating something like Readability or the Python package python-readability (aka readability-lxml) for reducing web pages to the primary content. It's worked out well in org-protocol-capture-html. By the way, here's some code I've been using to read and/or capture web pages from URLs on the clipboard: #+BEGIN_SRC elisp (defun url-to-org-with-readability (url) "Get page content of URL with python-readability, convert to Org with Pandoc, and display in buffer." (let (title content new-buffer) (with-temp-buffer (unless (= 0 (call-process "python" nil '(t t) nil "-m" "readability.readability" "-u" url)) (error "Python readability-lxml script failed: %s" (buffer-string))) ;; Get title (goto-char (point-min)) (setq title (buffer-substring-no-properties (search-forward "Title:") (line-end-position))) (unless (= 0 (call-process-region (point-min) (point-max) "pandoc" t t nil "--no-wrap" "-f" "html" "-t" "org")) (error "Pandoc failed.")) (setq content (buffer-substring (point-min) (buffer-end 1)))) ;; Make new buffer (setq new-buffer (generate-new-buffer title)) (with-current-buffer new-buffer (insert (concat "* [[" url "][" title "]]\n\n")) (insert content) (org-mode) (goto-char (point-min)) (org-cycle) (switch-to-buffer new-buffer)))) (defun read-url-with-org () "Call `url-to-org-with-readability' on URL in kill ring." (interactive) (url-to-org-with-readability (first kill-ring))) (defun org-capture-web-page-with-readability (&optional url) "Return string containing entire capture to be inserted in org-capture template." (let ((url (or url (first kill-ring))) ;; From org-insert-time-stamp (timestamp (format-time-string (concat "[" (substring (cdr org-time-stamp-formats) 1 -1) "]"))) title title-linked content) (with-temp-buffer (unless (= 0 (call-process "python" nil '(t t) nil "-m" "readability.readability" "-u" url)) (error "Python readability-lxml script failed: %s" (buffer-string))) ;; Get title (goto-char (point-min)) (setq title (buffer-substring-no-properties (search-forward "Title:") (line-end-position))) (setq title-linked (concat "[[" url "][" title "]]")) (unless (= 0 (call-process-region (point-min) (point-max) "pandoc" t t nil "--no-wrap" "-f" "html" "-t" "org")) (error "Pandoc failed.")) ;; Demote page headings in capture buffer to below the ;; top-level Org heading and "Article" 2nd-level heading (save-excursion (goto-char (point-min)) (while (re-search-forward (rx bol (1+ "*") (1+ space)) nil t) (beginning-of-line) (insert "**") (end-of-line))) (goto-char (point-min)) (goto-line 2) (setq content (s-trim (buffer-substring (point) (buffer-end 1)))) ;; Return capture for insertion (concat title-linked " :website:\n\n" timestamp "\n\n** Article\n\n" content)))) ;; org-capture template ("wr" "Capture Web site with python-readability" entry (file "~/org/articles.org") "* %(org-capture-web-page-with-readability)") #+END_SRC