nickva commented on code in PR #5983:
URL: https://github.com/apache/couchdb/pull/5983#discussion_r3198607641


##########
src/couch_replicator/src/couch_replicator_dns.erl:
##########
@@ -0,0 +1,206 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_replicator_dns).
+
+-include_lib("ibrowse/include/ibrowse.hrl").
+
+-export([
+    init/0,
+    apply_dns_override/2
+]).
+
+-ifdef(TEST).
+-export([
+    parse_config/1,
+    match_pattern/2,
+    get_overrides/0,
+    resolve_host/1,
+    is_ip_address/1
+]).
+-endif.
+
+-type dns_override() :: {binary(), binary()}.
+
+-define(DNS_OVERRIDES_KEY, {?MODULE, dns_overrides}).
+
+%% Initialize DNS overrides cache
+-spec init() -> ok.
+init() ->
+    Overrides =
+        case config:get("replicator", "dns_overrides", undefined) of
+            undefined -> [];
+            ConfigStr -> parse_config(ConfigStr)
+        end,
+    persistent_term:put(?DNS_OVERRIDES_KEY, Overrides),
+    ok.
+
+-spec resolve_host(string()) -> {string(), string() | undefined}.
+resolve_host(Host) ->
+    case find_override(list_to_binary(Host), get_overrides()) of
+        {ok, Target} ->
+            {binary_to_list(Target), Host};
+        not_found ->
+            {Host, undefined}
+    end.
+
+-spec get_overrides() -> [dns_override()].
+get_overrides() ->
+    try
+        persistent_term:get(?DNS_OVERRIDES_KEY, [])

Review Comment:
   `persistent_term:get/2` will always succeed with a default value. To get an 
explicit `not_initialized` case we could use a maker like 
`persistent_term:get(?DNS_OVERRIDES_KEY, not_initialized)` then, initialize 
(save the persistent term) and return



##########
src/docs/src/config/replicator.rst:
##########
@@ -279,6 +279,25 @@ Replicator Database Configuration
         on error; however, in some cases it may be useful to prevent spending
         time attempting to call ``_bulk_get`` altogether.
 
+    .. config:option:: dns_overrides :: DNS overrides for replication requests
+
+        Comma delimited ``pattern:target`` mappings to use for replicator
+        requests. This is useful for cases where outbound HTTP requests must be
+        made through a transparent SNI proxy.
+
+        ``pattern`` may be either an exact hostname such as ``foo.bar.com`` or 
a
+        leading wildcard for subdomains such as ``*.example.test``. Wildcards
+        are supported only at the start of the pattern.
+
+        ``target`` may be an IPv4 address, a hostname, or a bracketed IPv6
+        address. Brackets avoid ambiguity with the ``:`` separator used between
+        the pattern and target.
+
+        For example::
+
+            [replicator]
+            dns_overrides = 
foo.bar.com:proxy.internal,*.example.test:127.0.0.1,[2001:db8::10]:[2001:db8::20]

Review Comment:
   Would `[2001:db8::10]:[2001:db8::20]` be something like 
`*.test_ip6.local:[2001:db8::20]`?



##########
src/couch_replicator/src/couch_replicator_dns.erl:
##########
@@ -0,0 +1,206 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_replicator_dns).
+
+-include_lib("ibrowse/include/ibrowse.hrl").
+
+-export([
+    init/0,
+    apply_dns_override/2
+]).
+
+-ifdef(TEST).
+-export([
+    parse_config/1,
+    match_pattern/2,
+    get_overrides/0,
+    resolve_host/1,
+    is_ip_address/1
+]).
+-endif.
+
+-type dns_override() :: {binary(), binary()}.
+
+-define(DNS_OVERRIDES_KEY, {?MODULE, dns_overrides}).
+
+%% Initialize DNS overrides cache
+-spec init() -> ok.
+init() ->
+    Overrides =
+        case config:get("replicator", "dns_overrides", undefined) of
+            undefined -> [];
+            ConfigStr -> parse_config(ConfigStr)
+        end,
+    persistent_term:put(?DNS_OVERRIDES_KEY, Overrides),
+    ok.
+
+-spec resolve_host(string()) -> {string(), string() | undefined}.
+resolve_host(Host) ->
+    case find_override(list_to_binary(Host), get_overrides()) of
+        {ok, Target} ->
+            {binary_to_list(Target), Host};
+        not_found ->
+            {Host, undefined}
+    end.
+
+-spec get_overrides() -> [dns_override()].
+get_overrides() ->
+    try
+        persistent_term:get(?DNS_OVERRIDES_KEY, [])
+    catch
+        error:badarg ->
+            % not initialized yet, fall back to reading config
+            case config:get("replicator", "dns_overrides", undefined) of
+                undefined -> [];
+                ConfigStr -> parse_config(ConfigStr)
+            end
+    end.
+
+-spec parse_config(string()) -> [dns_override()].
+parse_config(ConfigStr) ->
+    ConfigBin = list_to_binary(ConfigStr),
+    Entries = binary:split(ConfigBin, <<",">>, [global, trim]),
+    lists:filtermap(fun parse_entry/1, Entries).
+
+% Note: IPv6 addresses in targets must be enclosed in brackets.
+% Format: pattern:target
+% Valid:   *.example.com:[2001:db8::1]
+% Invalid: [2001:db8::1]:proxy.internal (IPv6 as pattern not supported)
+parse_entry(<<>>) ->
+    false;
+parse_entry(Entry0) ->
+    Entry = string:trim(Entry0),
+    case binary:split(Entry, <<":">>) of
+        [Pattern0, Target0] ->
+            Pattern = string:trim(Pattern0),
+            Target = string:trim(Target0),
+            case {Pattern, Target} of
+                {<<>>, _} ->
+                    invalid_entry(Entry);
+                {_, <<>>} ->
+                    invalid_entry(Entry);
+                % Reject IPv6 addresses as patterns (they start with '[')
+                {<<"[", _/binary>>, _} ->
+                    invalid_entry_reason(Entry, "IPv6 addresses cannot be used 
as patterns");
+                _ ->
+                    {true, {Pattern, Target}}
+            end;
+        _ ->
+            invalid_entry(Entry)
+    end.
+
+invalid_entry(Entry) ->
+    couch_log:warning("Invalid dns_override entry: ~ts", [Entry]),
+    false.
+
+invalid_entry_reason(Entry, Reason) ->
+    couch_log:warning("Invalid dns_override entry: ~ts (~s)", [Entry, Reason]),
+    false.
+
+find_override(_Host, []) ->
+    not_found;
+find_override(Host, [{Pattern, Target} | Rest]) ->
+    case match_pattern(Host, Pattern) of
+        true ->
+            {ok, Target};
+        false ->
+            find_override(Host, Rest)
+    end.
+
+% DNS Override Pattern Matching
+%
+% Supports leading wildcard patterns only:
+%   - *.example.com matches any.subdomain.example.com
+%   - *.example.com does NOT match example.com (requires at least one 
subdomain)
+%
+% Not supported:
+%   - middle wildcards: sub.*.example.com
+%   - trailing wildcards: example.*
+%   - multiple wildcards: *.*.example.com
+-spec match_pattern(binary(), binary()) -> boolean().
+match_pattern(Host, Pattern) when is_binary(Host), is_binary(Pattern) ->
+    % DNS names are case-insensitive
+    HostLower = string:lowercase(Host),
+    PatternLower = string:lowercase(Pattern),
+    match_pattern_impl(HostLower, PatternLower).
+
+match_pattern_impl(Host, <<"*", Suffix/binary>>) ->
+    % wildcard match: extract last N bytes from Host and compare to Suffix
+    HostSize = byte_size(Host),
+    SuffixSize = byte_size(Suffix),
+    % ensure we have enough bytes before extracting suffix
+    case HostSize >= SuffixSize of
+        true ->
+            Pos = HostSize - SuffixSize,
+            binary:part(Host, Pos, SuffixSize) =:= Suffix;
+        false ->
+            false
+    end;
+match_pattern_impl(Host, Pattern) ->
+    Host =:= Pattern.
+
+-spec is_ip_address(string()) -> boolean().
+is_ip_address(Host) when is_list(Host) ->
+    % Strip brackets for IPv6 if present

Review Comment:
   We validate it here but should also parse it properly in the parse_entry 
somewhere? TCP connect can only handle the {_, _, ...} tuple I think



##########
src/couch_replicator/src/couch_replicator_dns.erl:
##########
@@ -0,0 +1,206 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_replicator_dns).
+
+-include_lib("ibrowse/include/ibrowse.hrl").
+
+-export([
+    init/0,
+    apply_dns_override/2
+]).
+
+-ifdef(TEST).
+-export([
+    parse_config/1,
+    match_pattern/2,
+    get_overrides/0,
+    resolve_host/1,
+    is_ip_address/1
+]).
+-endif.
+
+-type dns_override() :: {binary(), binary()}.
+
+-define(DNS_OVERRIDES_KEY, {?MODULE, dns_overrides}).
+
+%% Initialize DNS overrides cache
+-spec init() -> ok.
+init() ->
+    Overrides =
+        case config:get("replicator", "dns_overrides", undefined) of
+            undefined -> [];
+            ConfigStr -> parse_config(ConfigStr)
+        end,
+    persistent_term:put(?DNS_OVERRIDES_KEY, Overrides),
+    ok.
+
+-spec resolve_host(string()) -> {string(), string() | undefined}.
+resolve_host(Host) ->
+    case find_override(list_to_binary(Host), get_overrides()) of
+        {ok, Target} ->
+            {binary_to_list(Target), Host};
+        not_found ->
+            {Host, undefined}
+    end.
+
+-spec get_overrides() -> [dns_override()].
+get_overrides() ->
+    try
+        persistent_term:get(?DNS_OVERRIDES_KEY, [])
+    catch
+        error:badarg ->
+            % not initialized yet, fall back to reading config
+            case config:get("replicator", "dns_overrides", undefined) of
+                undefined -> [];
+                ConfigStr -> parse_config(ConfigStr)
+            end
+    end.
+
+-spec parse_config(string()) -> [dns_override()].
+parse_config(ConfigStr) ->
+    ConfigBin = list_to_binary(ConfigStr),
+    Entries = binary:split(ConfigBin, <<",">>, [global, trim]),
+    lists:filtermap(fun parse_entry/1, Entries).
+
+% Note: IPv6 addresses in targets must be enclosed in brackets.
+% Format: pattern:target
+% Valid:   *.example.com:[2001:db8::1]
+% Invalid: [2001:db8::1]:proxy.internal (IPv6 as pattern not supported)
+parse_entry(<<>>) ->
+    false;
+parse_entry(Entry0) ->
+    Entry = string:trim(Entry0),
+    case binary:split(Entry, <<":">>) of
+        [Pattern0, Target0] ->
+            Pattern = string:trim(Pattern0),
+            Target = string:trim(Target0),
+            case {Pattern, Target} of
+                {<<>>, _} ->
+                    invalid_entry(Entry);
+                {_, <<>>} ->
+                    invalid_entry(Entry);
+                % Reject IPv6 addresses as patterns (they start with '[')
+                {<<"[", _/binary>>, _} ->
+                    invalid_entry_reason(Entry, "IPv6 addresses cannot be used 
as patterns");
+                _ ->
+                    {true, {Pattern, Target}}
+            end;
+        _ ->
+            invalid_entry(Entry)
+    end.
+
+invalid_entry(Entry) ->
+    couch_log:warning("Invalid dns_override entry: ~ts", [Entry]),
+    false.
+
+invalid_entry_reason(Entry, Reason) ->
+    couch_log:warning("Invalid dns_override entry: ~ts (~s)", [Entry, Reason]),
+    false.
+
+find_override(_Host, []) ->
+    not_found;
+find_override(Host, [{Pattern, Target} | Rest]) ->
+    case match_pattern(Host, Pattern) of
+        true ->
+            {ok, Target};
+        false ->
+            find_override(Host, Rest)
+    end.
+
+% DNS Override Pattern Matching
+%
+% Supports leading wildcard patterns only:
+%   - *.example.com matches any.subdomain.example.com
+%   - *.example.com does NOT match example.com (requires at least one 
subdomain)
+%
+% Not supported:
+%   - middle wildcards: sub.*.example.com
+%   - trailing wildcards: example.*
+%   - multiple wildcards: *.*.example.com
+-spec match_pattern(binary(), binary()) -> boolean().
+match_pattern(Host, Pattern) when is_binary(Host), is_binary(Pattern) ->
+    % DNS names are case-insensitive
+    HostLower = string:lowercase(Host),
+    PatternLower = string:lowercase(Pattern),
+    match_pattern_impl(HostLower, PatternLower).
+
+match_pattern_impl(Host, <<"*", Suffix/binary>>) ->
+    % wildcard match: extract last N bytes from Host and compare to Suffix
+    HostSize = byte_size(Host),
+    SuffixSize = byte_size(Suffix),
+    % ensure we have enough bytes before extracting suffix
+    case HostSize >= SuffixSize of
+        true ->
+            Pos = HostSize - SuffixSize,
+            binary:part(Host, Pos, SuffixSize) =:= Suffix;
+        false ->
+            false
+    end;
+match_pattern_impl(Host, Pattern) ->
+    Host =:= Pattern.
+
+-spec is_ip_address(string()) -> boolean().
+is_ip_address(Host) when is_list(Host) ->
+    % Strip brackets for IPv6 if present
+    HostStripped = string:trim(Host, both, "[]"),
+    case inet:parse_address(HostStripped) of
+        {ok, _} -> true;
+        _ -> false
+    end.
+
+%% Apply DNS override and SNI configuration to ibrowse options
+-spec apply_dns_override(string(), list()) -> list().
+apply_dns_override(Url, IbrowseOptions) ->
+    case ibrowse_lib:parse_url(Url) of
+        {error, _} ->
+            IbrowseOptions;
+        #url{host = Host, protocol = Protocol} ->
+            {TargetHost, OriginalHost} = resolve_host(Host),
+            apply_override_options(
+                IbrowseOptions,
+                Protocol,
+                TargetHost,
+                OriginalHost
+            )
+    end.
+
+%% Internal: Apply connect_to and SNI options
+-spec apply_override_options(list(), atom(), string(), string() | undefined) 
-> list().
+apply_override_options(Opts, _Protocol, _TargetHost, undefined) ->
+    % No override active
+    Opts;
+apply_override_options(Opts, Protocol, TargetHost, OriginalHost) ->
+    % Log DNS override
+    couch_log:debug(
+        "DNS override (~p): ~s -> ~s",
+        [Protocol, OriginalHost, TargetHost]
+    ),
+    couch_stats:increment_counter([couch_replicator, dns_overrides_applied]),
+    % Add connect_to option
+    Opts1 = [{connect_to, TargetHost} | Opts],
+    % Add SNI for HTTPS if OriginalHost is a hostname (not IP)
+    case {Protocol, is_ip_address(OriginalHost)} of
+        {https, false} ->
+            add_sni_option(Opts1, OriginalHost);
+        _ ->
+            Opts1

Review Comment:
   We fixed the issue that we don't set SNI for IP addresses however upstream 
in ibrowse we still set it unconditionally in `ensure_sni(Opts, Host)`
   
   Ideally we'd patch upstream ibrowse but for now I think SSL app lets us 
explicitly set SNI to `disable`
   
   https://www.erlang.org/doc/apps/ssl/ssl.html#t:client_option_cert/0
   
   > Specify the hostname to be used in TLS Server Name Indication extension. 
If not specified it will default to the Host argument of 
[connect/3,4](https://www.erlang.org/doc/apps/ssl/ssl.html#connect/3) unless it 
is of type 
[inet:ip_address()](https://www.erlang.org/doc/apps/kernel/inet.html#t:ip_address/0).
 The hostname will also be used in the hostname verification of the peer 
certificate using 
[public_key:pkix_verify_hostname/2](https://www.erlang.org/doc/apps/public_key/public_key.html#pkix_verify_hostname/2).
 The special value disable prevents the Server Name Indication extension from 
being sent and disables the hostname verification check.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to