Hi,

Ingo Schwarze wrote on Thu, May 24, 2018 at 09:15:29PM +0200:
> justina colmena wrote on Thu, May 24, 2018 at 05:54:45PM +0000:
>> On Wed, 23 May 2018 11:47:47 +0200 Marko Cupac wrote:

>>> I am sure OpenBSD will correct their errors in html/css code, if any,

>> Right now, https://man.openbsd.org/relayd.conf.5 fails html validation.
>> https://validator.w3.org/nu/?doc=https%3A%2F%2Fman.openbsd.org%2Frelayd.conf.5
>> There are several html <code> elements with duplicate IDs.

> Sure, that's on the TODO list:
[...]
> It's not the worst HTML syntax violation left in mandoc,
> and it's among the easier ones to fix.

Anyway, i just fixed it with the commit below (and also installed
the fix on man.openbsd.org).

[...]
> Actually, just skipping dupes may be better than suffixes because
> permalinks with suffixes don't make much sense.  As soon as someone
> inserts or deletes text, subsequent permalink anchors with suffixes
> might suddenly point to different places, which defeats the very
> purpose of permalinks.
> 
> Besides, the main virtue of mandoc permalinks is their simplicity,
> allowing people to type them by hand without even looking at the
> manual page first.  I just know that
> 
>   https://man.openbsd.org/cat#v
> 
> will work (and is harmful), without even testing it first.
> Appending suffixes would compromise that virtue.

After re-reading the discussion with Jakub, i came to the conclusion
that suffixes do *not* compromise that virtue -
  https://man.openbsd.org/cat#v
still works just like before even with suffixes because the first
occurence does of course not get a suffix.

Generating the deduplication suffixes is simple, does not force
anybody to actually use them, but they may occasionally be useful
in some situations - probably not so much as links from a static
website to man.openbsd.org, but for example for quickly directing
somebody from a chat to a particular point in the current version
of a specific manual page:

  <person_1> i feel confused...  :-(
  <person_2> well i'm talking about
    https://man.openbsd.org/relayd.conf#forward_to_6
    not about
    https://man.openbsd.org/relayd.conf#forward_to_3

Yours,
  Ingo


Log Message:
-----------
Do not write duplicate id= attributes, they violate HTML syntax.
Append suffixes for disambiguation.  Issue first reported by Jakub
Klinkovsky <j dot l dot k at gmx dot com> (Arch Linux).

Modified Files:
--------------
    mandoc:
        TODO
        html.c
        html.h
        man_html.c
        mdoc_html.c

Revision Data
-------------
Index: html.c
===================================================================
RCS file: /home/cvs/mandoc/mandoc/html.c,v
retrieving revision 1.228
retrieving revision 1.229
diff -Lhtml.c -Lhtml.c -u -p -r1.228 -r1.229
--- html.c
+++ html.c
@@ -22,6 +22,7 @@
 #include <assert.h>
 #include <ctype.h>
 #include <stdarg.h>
+#include <stddef.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
@@ -29,6 +30,7 @@
 #include <unistd.h>
 
 #include "mandoc_aux.h"
+#include "mandoc_ohash.h"
 #include "mandoc.h"
 #include "roff.h"
 #include "out.h"
@@ -117,6 +119,9 @@ static      const char      *const roffscales[SCAL
        "ex", /* SCALE_FS */
 };
 
+/* Avoid duplicate HTML id= attributes. */
+static struct ohash     id_unique;
+
 static void     a2width(const char *, struct roffsu *);
 static void     print_byte(struct html *, char);
 static void     print_endword(struct html *);
@@ -144,6 +149,8 @@ html_alloc(const struct manoutput *outop
        if (outopts->fragment)
                h->oflags |= HTML_FRAGMENT;
 
+       mandoc_ohash_init(&id_unique, 4, 0);
+
        return h;
 }
 
@@ -152,15 +159,22 @@ html_free(void *p)
 {
        struct tag      *tag;
        struct html     *h;
+       char            *cp;
+       unsigned int     slot;
 
        h = (struct html *)p;
-
        while ((tag = h->tag) != NULL) {
                h->tag = tag->next;
                free(tag);
        }
-
        free(h);
+
+       cp = ohash_first(&id_unique, &slot);
+       while (cp != NULL) {
+               free(cp);
+               cp = ohash_next(&id_unique, &slot);
+       }
+       ohash_delete(&id_unique);
 }
 
 void
@@ -257,10 +271,12 @@ print_metaf(struct html *h, enum mandoc_
 }
 
 char *
-html_make_id(const struct roff_node *n)
+html_make_id(const struct roff_node *n, int unique)
 {
        const struct roff_node  *nch;
-       char                    *buf, *cp;
+       char                    *buf, *bufs, *cp;
+       unsigned int             slot;
+       int                      suffix;
 
        for (nch = n->child; nch != NULL; nch = nch->next)
                if (nch->type != ROFFT_TEXT)
@@ -277,6 +293,30 @@ html_make_id(const struct roff_node *n)
                if (*cp == ' ')
                        *cp = '_';
 
+       if (unique == 0)
+               return buf;
+
+       /* Avoid duplicate HTML id= attributes. */
+
+       bufs = NULL;
+       suffix = 1;
+       slot = ohash_qlookup(&id_unique, buf);
+       cp = ohash_find(&id_unique, slot);
+       if (cp != NULL) {
+               while (cp != NULL) {
+                       free(bufs);
+                       if (++suffix > 127) {
+                               free(buf);
+                               return NULL;
+                       }
+                       mandoc_asprintf(&bufs, "%s_%d", buf, suffix);
+                       slot = ohash_qlookup(&id_unique, bufs);
+                       cp = ohash_find(&id_unique, slot);
+               }
+               free(buf);
+               buf = bufs;
+       }
+       ohash_insert(&id_unique, slot, buf);
        return buf;
 }
 
Index: html.h
===================================================================
RCS file: /home/cvs/mandoc/mandoc/html.h,v
retrieving revision 1.89
retrieving revision 1.90
diff -Lhtml.h -Lhtml.h -u -p -r1.89 -r1.90
--- html.h
+++ html.h
@@ -133,5 +133,5 @@ void                  print_eqn(struct html *, const s
 void             print_paragraph(struct html *);
 void             print_endline(struct html *);
 
-char            *html_make_id(const struct roff_node *);
+char            *html_make_id(const struct roff_node *, int);
 int              html_strlen(const char *);
Index: man_html.c
===================================================================
RCS file: /home/cvs/mandoc/mandoc/man_html.c,v
retrieving revision 1.149
retrieving revision 1.150
diff -Lman_html.c -Lman_html.c -u -p -r1.149 -r1.150
--- man_html.c
+++ man_html.c
@@ -428,11 +428,10 @@ man_SH_pre(MAN_ARGS)
        char    *id;
 
        if (n->type == ROFFT_HEAD) {
-               id = html_make_id(n);
+               id = html_make_id(n, 1);
                print_otag(h, TAG_H1, "cTi", "Sh", id);
                if (id != NULL)
                        print_otag(h, TAG_A, "chR", "permalink", id);
-               free(id);
        }
        return 1;
 }
@@ -498,11 +497,10 @@ man_SS_pre(MAN_ARGS)
        char    *id;
 
        if (n->type == ROFFT_HEAD) {
-               id = html_make_id(n);
+               id = html_make_id(n, 1);
                print_otag(h, TAG_H2, "cTi", "Ss", id);
                if (id != NULL)
                        print_otag(h, TAG_A, "chR", "permalink", id);
-               free(id);
        }
        return 1;
 }
Index: TODO
===================================================================
RCS file: /home/cvs/mandoc/mandoc/TODO,v
retrieving revision 1.253
retrieving revision 1.254
diff -LTODO -LTODO -u -p -r1.253 -r1.254
--- TODO
+++ TODO
@@ -379,12 +379,9 @@ are mere guesses, and some may be wrong.
 
 --- HTML issues --------------------------------------------------------
 
-- duplicate names generate duplicate href="#..." anchor attributes
-  possibly use "#..._<N>" suffixes?
-  Jakub Klinkovsky <j dot l dot k at gmx dot com> 3 Oct 2017 21:23:36 +0200
-  see also the thread: gre(4): Rename duplicate sections
-  up to 20 Apr 2018 15:27:33 +0200
-  loc *  exist *  algo *  size *  imp ***
+- @media queries to reduce indentation on low-res displays
+  some mails in the Viewport for man.openbsd.org thread
+  e.g. Adam Thompson 24 May 2018 15:09:00 -0500
 
 - wrap Sh and Ss content into <div>
   Laura Morales <lauretas at mail dot com> 21 Apr 2018 18:10:48 +0200
Index: mdoc_html.c
===================================================================
RCS file: /home/cvs/mandoc/mandoc/mdoc_html.c,v
retrieving revision 1.303
retrieving revision 1.304
diff -Lmdoc_html.c -Lmdoc_html.c -u -p -r1.303 -r1.304
--- mdoc_html.c
+++ mdoc_html.c
@@ -502,7 +502,7 @@ cond_id(const struct roff_node *n)
             (n->parent->tok == MDOC_Xo &&
              n->parent->parent->prev == NULL &&
              n->parent->parent->parent->tok == MDOC_It)))
-               return html_make_id(n);
+               return html_make_id(n, 1);
        return NULL;
 }
 
@@ -513,11 +513,10 @@ mdoc_sh_pre(MDOC_ARGS)
 
        switch (n->type) {
        case ROFFT_HEAD:
-               id = html_make_id(n);
+               id = html_make_id(n, 1);
                print_otag(h, TAG_H1, "cTi", "Sh", id);
                if (id != NULL)
                        print_otag(h, TAG_A, "chR", "permalink", id);
-               free(id);
                break;
        case ROFFT_BODY:
                if (n->sec == SEC_AUTHORS)
@@ -537,11 +536,10 @@ mdoc_ss_pre(MDOC_ARGS)
        if (n->type != ROFFT_HEAD)
                return 1;
 
-       id = html_make_id(n);
+       id = html_make_id(n, 1);
        print_otag(h, TAG_H2, "cTi", "Ss", id);
        if (id != NULL)
                print_otag(h, TAG_A, "chR", "permalink", id);
-       free(id);
        return 1;
 }
 
@@ -553,7 +551,6 @@ mdoc_fl_pre(MDOC_ARGS)
        if ((id = cond_id(n)) != NULL)
                print_otag(h, TAG_A, "chR", "permalink", id);
        print_otag(h, TAG_CODE, "cTi", "Fl", id);
-       free(id);
 
        print_text(h, "\\-");
        if (!(n->child == NULL &&
@@ -573,7 +570,6 @@ mdoc_cm_pre(MDOC_ARGS)
        if ((id = cond_id(n)) != NULL)
                print_otag(h, TAG_A, "chR", "permalink", id);
        print_otag(h, TAG_CODE, "cTi", "Cm", id);
-       free(id);
        return 1;
 }
 
@@ -882,7 +878,7 @@ mdoc_sx_pre(MDOC_ARGS)
 {
        char    *id;
 
-       id = html_make_id(n);
+       id = html_make_id(n, 0);
        print_otag(h, TAG_A, "cThR", "Sx", id);
        free(id);
        return 1;
@@ -1030,7 +1026,6 @@ mdoc_dv_pre(MDOC_ARGS)
        if ((id = cond_id(n)) != NULL)
                print_otag(h, TAG_A, "chR", "permalink", id);
        print_otag(h, TAG_CODE, "cTi", "Dv", id);
-       free(id);
        return 1;
 }
 
@@ -1042,7 +1037,6 @@ mdoc_ev_pre(MDOC_ARGS)
        if ((id = cond_id(n)) != NULL)
                print_otag(h, TAG_A, "chR", "permalink", id);
        print_otag(h, TAG_CODE, "cTi", "Ev", id);
-       free(id);
        return 1;
 }
 
@@ -1055,12 +1049,11 @@ mdoc_er_pre(MDOC_ARGS)
            (n->parent->tok == MDOC_It ||
             (n->parent->tok == MDOC_Bq &&
              n->parent->parent->parent->tok == MDOC_It)) ?
-           html_make_id(n) : NULL;
+           html_make_id(n, 1) : NULL;
 
        if (id != NULL)
                print_otag(h, TAG_A, "chR", "permalink", id);
        print_otag(h, TAG_CODE, "cTi", "Er", id);
-       free(id);
        return 1;
 }
 
@@ -1411,7 +1404,6 @@ mdoc_ic_pre(MDOC_ARGS)
        if ((id = cond_id(n)) != NULL)
                print_otag(h, TAG_A, "chR", "permalink", id);
        print_otag(h, TAG_CODE, "cTi", "Ic", id);
-       free(id);
        return 1;
 }
 
@@ -1464,7 +1456,6 @@ mdoc_ms_pre(MDOC_ARGS)
        if ((id = cond_id(n)) != NULL)
                print_otag(h, TAG_A, "chR", "permalink", id);
        print_otag(h, TAG_SPAN, "cTi", "Ms", id);
-       free(id);
        return 1;
 }
 
@@ -1505,7 +1496,6 @@ mdoc_no_pre(MDOC_ARGS)
        if ((id = cond_id(n)) != NULL)
                print_otag(h, TAG_A, "chR", "permalink", id);
        print_otag(h, TAG_SPAN, "ci", "No", id);
-       free(id);
        return 1;
 }
 
@@ -1517,7 +1507,6 @@ mdoc_li_pre(MDOC_ARGS)
        if ((id = cond_id(n)) != NULL)
                print_otag(h, TAG_A, "chR", "permalink", id);
        print_otag(h, TAG_CODE, "ci", "Li", id);
-       free(id);
        return 1;
 }
 

Reply via email to