This is an automated email from the ASF dual-hosted git repository.

knarendran pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fluo-muchos.git


The following commit(s) were added to refs/heads/main by this push:
     new 3516729  Add support for multiple VMSS on Azure (#394)
3516729 is described below

commit 3516729b303acefb2898e557fb6d4ab643a38074
Author: Karthick Narendran <[email protected]>
AuthorDate: Tue May 11 10:13:26 2021 +0100

    Add support for multiple VMSS on Azure (#394)
    
    Co-authored-by: Karthick Narendran <[email protected]>
---
 README.md                                          |   2 +
 ansible/azure_terminate.yml                        |  17 ++
 ansible/library/azure_host_role_map.py             |  92 +++++++++
 ...ure_rm_virtualmachinescaleset_nic_list_facts.py |   3 -
 .../roles/azure/tasks/assign_msi_multiple_vmss.yml |  40 ++++
 ansible/roles/azure/tasks/create_multiple_vmss.yml | 228 +++++++++++++++++++++
 ansible/roles/azure/tasks/main.yml                 |   7 +-
 conf/.gitignore                                    |   2 +
 conf/azure_multiple_vmss_vars.yml.example          |  94 +++++++++
 conf/muchos.props.example                          |   2 +
 docs/azure-multiple-vmss.md                        |  34 +++
 lib/muchos/azure.py                                | 136 ++++++++++++
 lib/muchos/config/azure.py                         |  16 ++
 13 files changed, 669 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 04c300c..390793b 100644
--- a/README.md
+++ b/README.md
@@ -168,6 +168,8 @@ Under the `azure` section, edit following values as per 
your configuration:
 * `vnet` to provide the name of the VNET that your cluster nodes should use. A 
new VNET with this name will be
   created if it doesn't already exist
 * `subnet` to provide a name for the subnet within which the cluster resources 
will be deployed
+* `use_multiple_vmss` allows you to configure VMs with different CPU, memory, 
disks for leaders and workers. To 
+  know more about this feature, please follow the 
[doc](docs/azure-multiple-vmss.md).   
 * `azure_image_reference` allows you to specify the CentOS image SKU in the 
format as shown below. To configure 
   CentOS 8.x, please follow [these steps](docs/azure-image-reference.md).
   ```bash
diff --git a/ansible/azure_terminate.yml b/ansible/azure_terminate.yml
index 2928924..b7edc43 100644
--- a/ansible/azure_terminate.yml
+++ b/ansible/azure_terminate.yml
@@ -76,6 +76,23 @@
         name: "{{ vmss_name }}"
         remove_on_absent: all
         state: absent
+      when: use_multiple_vmss is not defined or not use_multiple_vmss
+
+    - name: Include azure_multiple_vmss_vars.yml definition
+      include_vars:
+        file: "{{ deploy_path }}/conf/azure_multiple_vmss_vars.yml"
+        name: azure_multiple_vmss_vars
+      when: use_multiple_vmss
+
+    - name: Delete VM Scale Sets
+      azure_rm_virtualmachinescaleset:
+        resource_group: "{{ resource_group }}"
+        name: "{{ vmss_name }}-{{ item.name_suffix }}"
+        remove_on_absent: all
+        state: absent
+      with_items:
+        - "{{ azure_multiple_vmss_vars.vars_list }}"
+      when: use_multiple_vmss
 
     - name: Delete azure proxy virtual machine if one was created
       azure_rm_virtualmachine:
diff --git a/ansible/library/azure_host_role_map.py 
b/ansible/library/azure_host_role_map.py
new file mode 100644
index 0000000..4d52c2c
--- /dev/null
+++ b/ansible/library/azure_host_role_map.py
@@ -0,0 +1,92 @@
+#!/usr/bin/python3
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ansible.module_utils.basic import AnsibleModule
+from collections import defaultdict
+import os
+from os.path import join
+
+deploy_path = os.environ.get("MUCHOS_HOME")
+
+
+def label(hosts, labels):
+    hld = defaultdict(list)
+    for i, host in enumerate(hosts):
+        for tmpLabel, n in labels.items():
+            if i < n:
+                hld[host].append(tmpLabel)
+    return hld
+
+
+def stringify(L, hns):
+    # Flatten the list of dicts
+    labels = {k: v for d in L for k, v in d.items()}
+    label_string_dict = {k: ",".join(v) for k, v in labels.items()}
+    label_list = [
+        "{} = {} {}".format(k, v, hns[k]) for k, v in label_string_dict.items()
+    ]
+    return "\n".join(label_list)
+
+
+def main():
+
+    fields = {
+        "hosts": {"required": True, "type": "list"},
+        "vars_list": {"required": True, "type": "dict"},
+        "cluster_name": {"required": True, "type": "str"},
+    }
+
+    module = AnsibleModule(argument_spec=fields)
+    vars_list = module.params["vars_list"]
+    hosts = module.params["hosts"]
+    cluster_name = module.params["cluster_name"]
+    mp = {x["name_suffix"]: x["roles"] for x in vars_list["vars_list"]}
+    ns_mp = {
+        cluster_name + "-" + x["name_suffix"]: x.get("nameservice_id", "")
+        for x in vars_list["vars_list"]
+    }
+
+    hd = defaultdict(list)
+    for host in hosts:
+        hd[host["key"]].append(host["value"])
+
+    label_tuples = {
+        cluster_name + "-" + k: (hd[cluster_name + "-" + k], v)
+        for k, v in mp.items()
+    }
+
+    hdfs_ns_tuples = {h: ns_mp[k] for k, v in hd.items() for h in v}
+
+    label_lists = [label(*v) for k, v in label_tuples.items()]
+    result_string = str(stringify(label_lists, hdfs_ns_tuples))
+
+    vmss_file = open(join(deploy_path, "conf/azure_vmss_to_hosts.conf"), "w")
+    for key in hd:
+        vmss_file.write("[" + key.replace("-", "_") + "]\n")
+        for value in hd[key]:
+            vmss_file.write(value)
+            vmss_file.write("\n")
+        vmss_file.write("\n")
+    vmss_file.write("\n")
+    vmss_file.close()
+
+    module.exit_json(result=result_string)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ansible/library/azure_rm_virtualmachinescaleset_nic_list_facts.py 
b/ansible/library/azure_rm_virtualmachinescaleset_nic_list_facts.py
index 2244b5a..cc20ea7 100644
--- a/ansible/library/azure_rm_virtualmachinescaleset_nic_list_facts.py
+++ b/ansible/library/azure_rm_virtualmachinescaleset_nic_list_facts.py
@@ -52,9 +52,6 @@ options:
 extends_documentation_fragment:
     - azure
 
-author:
-    - "Min Pae (@sputnik13)"
-
 '''
 
 EXAMPLES = '''
diff --git a/ansible/roles/azure/tasks/assign_msi_multiple_vmss.yml 
b/ansible/roles/azure/tasks/assign_msi_multiple_vmss.yml
new file mode 100644
index 0000000..c11e210
--- /dev/null
+++ b/ansible/roles/azure/tasks/assign_msi_multiple_vmss.yml
@@ -0,0 +1,40 @@
+---
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+# These Ansible tasks only run on the client machine where Muchos runs
+# At a high level, the various sections in this file do the following:
+# 1. Create (if not already existing): an Azure resource group, virtual 
network / subnet
+# 2. Optionally (if the user specified) create a VM and related resources to 
use as a proxy host
+# 3. Create the Azure VMSS to support the nodes for use with Muchos
+# 4. Automatically populate the hosts file and associated [nodes] section in 
muchos.props
+#
+
+- name: Assign User assigned Identity to Multiple VMSS
+  azure_rm_resource:
+    resource_group: "{{ resource_group }}"
+    provider: Compute
+    resource_type: virtualMachineScaleSets
+    resource_name: "{{ vmss_name }}-{{ item.name_suffix }}"
+    api_version: '2019-03-01'
+    body:
+     location: "{{ location }}"
+     identity:
+       type: UserAssigned
+       userAssignedIdentities: "{{ UserAssignedIdentityArr|join('') }}"
+  loop:
+     "{{ azure_multiple_vmss_vars.vars_list }}"
diff --git a/ansible/roles/azure/tasks/create_multiple_vmss.yml 
b/ansible/roles/azure/tasks/create_multiple_vmss.yml
new file mode 100644
index 0000000..3bac89b
--- /dev/null
+++ b/ansible/roles/azure/tasks/create_multiple_vmss.yml
@@ -0,0 +1,228 @@
+---
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+# These Ansible tasks only run on the client machine where Muchos runs
+# At a high level, the various sections in this file do the following:
+# 1. Create (if not already existing): an Azure resource group, virtual 
network / subnet
+# 2. Optionally (if the user specified) create a VM and related resources to 
use as a proxy host
+# 3. Create the Azure VMSS to support the nodes for use with Muchos
+# 4. Automatically populate the hosts file and associated [nodes] section in 
muchos.props
+#
+# For 1 & 2 it uses create_common_resources.yml & create_optional_proxy.yml
+
+- name: Include azure_multiple_vmss_vars.yml
+  include_vars:
+    file: "{{ deploy_path }}/conf/azure_multiple_vmss_vars.yml"
+    name: azure_multiple_vmss_vars
+
+- name: Create Scale Set
+  azure_rm_virtualmachinescaleset:
+    resource_group: "{{ resource_group }}"
+    location: "{{ location }}"
+    name: "{{ vmss_name }}-{{ item.name_suffix }}"
+    vm_size: "{{ item.sku }}"
+    admin_username: "{{ admin_username }}"
+    ssh_password_enabled: false
+    ssh_public_keys:
+      - path: /home/{{ admin_username }}/.ssh/authorized_keys
+        key_data: "{{ lookup('file', '~/.ssh/id_rsa.pub') }}"
+    capacity: "{{ item.capacity }}"
+    single_placement_group: "{{ False if item.capacity > 100 else omit }}"
+    virtual_network_name: "{{ vnet }}"
+    subnet_name: "{{ subnet }}"
+    upgrade_policy: Manual
+    tier: Standard
+    managed_disk_type: "{{ osdisk_sku }}"
+    os_disk_caching: ReadWrite
+    enable_accelerated_networking: "{{ accnet_capable }}"
+    image:
+      offer: "{{ image_offer if image_offer else omit }}"
+      publisher: "{{ image_publisher if image_publisher else omit }}"
+      sku:  "{{ image_sku if image_sku else omit }}"
+      version:  "{{ image_version if image_version else omit }}"
+      id: "{{ image_id if image_id else omit }}"
+    data_disks: |
+      {%- set data_disks = [] -%}
+      {%- for lun in range(item.data_disk_count) -%}
+        {%- set _ = data_disks.append({'lun': lun, 'disk_size_gb': 
item.data_disk_size_gb, 'managed_disk_type': item.disk_sku }) -%}
+      {%- endfor -%}
+      {{ data_disks }}
+  with_items:
+    - "{{ azure_multiple_vmss_vars.vars_list }}"
+  vars:
+  - image_offer: "{{ azure_image_reference.split('|')[0] }}"
+  - image_publisher: "{{ azure_image_reference.split('|')[1] }}"
+  - image_sku: "{{ azure_image_reference.split('|')[2] }}"
+  - image_version: "{{ azure_image_reference.split('|')[3] }}"
+  - image_id: "{{ azure_image_reference.split('|')[4] }}"
+  - accnet_capable: "{{ True if item.sku in accnet_capable_skus else False }}"
+  - osdisk_sku: "{{ 'Premium_LRS' if item.sku in premiumio_capable_skus else 
'Standard_LRS' }}"
+  register: _create_clusters
+  async: 600
+  poll: 0
+  tags: create_multiple_vmss
+
+- name: Wait
+  async_status:
+    jid: "{{ item.ansible_job_id }}"
+  register: _jobs
+  until: _jobs.finished
+  delay: 15
+  retries: 300
+  with_items: "{{ _create_clusters.results }}"
+
+- name: Get VMSS instances
+  azure_rm_virtualmachinescalesetinstance_info:
+    resource_group: "{{ resource_group }}"
+    vmss_name: "{{ vmss_name }}-{{ item.name_suffix }}"
+  register: _vmss_instances
+  with_items:
+    - "{{ azure_multiple_vmss_vars.vars_list }}"
+  async: 600
+  poll: 0
+
+- name: Get VMSS nic list
+  azure_rm_virtualmachinescaleset_nic_list_facts:
+    resource_group: "{{ resource_group }}"
+    vmss_name: "{{ vmss_name }}-{{ item.name_suffix }}"
+  register: _vmss_nic_list
+  with_items:
+    - "{{ azure_multiple_vmss_vars.vars_list }}"
+  async: 600
+  poll: 0
+
+- name: Wait for VMSS instance list operations
+  async_status:
+    jid: "{{ item.ansible_job_id }}"
+  register: vmss_instances
+  until: vmss_instances.finished
+  delay: 15
+  retries: 300
+  with_items: "{{ _vmss_instances.results }}"
+
+- name: Wait for NIC list operations
+  async_status:
+    jid: "{{ item.ansible_job_id }}"
+  register: vmss_nic_list
+  until: vmss_nic_list.finished
+  delay: 15
+  retries: 300
+  with_items: "{{ _vmss_nic_list.results }}"
+
+- name: Get VM hostname to IP mapping
+  set_fact:
+    hostname_ip_pairs: |
+      {%- set vmname_ips = [] -%}
+      {%- if azure_proxy_host is defined and azure_proxy_host -%}
+      {%- set _ = vmname_ips.append({'name': azure_proxy_host, 'ip': 
azure_proxy_public_ip.state.ip_address }) -%}
+      {%- endif -%}
+      {%- set vmid_names = {} -%}
+      {%- for vmss in vmss_instances.results -%}
+      {%- for instance in vmss.instances -%}
+      {%- set _ = vmid_names.__setitem__(instance.id, 
instance.name.replace('_','-')) -%}
+      {%- endfor -%}
+      {%- endfor -%}
+      {%- set vmid_ips = {} -%}
+      {%- for vmss in vmss_nic_list.results -%}
+      {%- for interface in vmss.networkinterfaces -%}
+      {%- if interface.virtualMachine is defined -%}
+      {%- set _ = vmid_ips.__setitem__(interface.virtualMachine.id, 
interface.ipConfigurations[0].privateIPAddress) -%}
+      {%- endif -%}
+      {%- endfor -%}
+      {%- endfor -%}
+      {%- for vmid in vmid_names -%}
+      {%- set _ = vmname_ips.append({'name': vmid_names[vmid], 'ip': 
vmid_ips[vmid]}) -%}
+      {%- endfor -%}
+      {{ vmname_ips }}
+
+- name: Ensures hosts sub-dir exists
+  file:
+     path: "{{ deploy_path }}/conf/hosts/"
+     state: directory
+     recurse: yes
+
+- name: Ensures host_vars sub-dir exists
+  file:
+     path: "{{ deploy_path }}/ansible/host_vars/"
+     state: directory
+     recurse: yes
+
+- name: Write hosts file
+  template:
+    src: hostname_ip_mappings.j2
+    dest: "{{ deploy_path }}/conf/hosts/{{ vmss_name }}"
+    mode: 0644
+
+- name: Get vmss to host map
+  set_fact:
+    vmss_host_pairs: |
+      {%- set vmss_host = [] -%}
+      {%- for vmss in vmss_instances.results -%}
+      {%- for instance in vmss.instances -%}
+      {%- set _ = vmss_host.append({'key': 
vmss.invocation.module_args.vmss_name, 'value': 
instance.name.replace('_','-')}) -%}
+      {%- endfor -%}
+      {%- endfor -%}
+      {{ vmss_host }}
+
+- name: Clear section
+  ini_file:
+    path: "{{ deploy_path }}/conf/muchos.props"
+    section: "nodes"
+    state: absent
+
+- name: Recreate section
+  ini_file:
+    path: "{{ deploy_path }}/conf/muchos.props"
+    section: "nodes"
+    option: "#host0"
+    value: "service"
+    state: present
+
+- name: add azure proxy host
+  lineinfile:
+    path: "{{ deploy_path }}/conf/muchos.props"
+    line: "{{ azure_proxy_host }} = client"
+  when: azure_proxy_host is defined and azure_proxy_host and azure_proxy_host 
!= None
+
+- name: Get host-role assignments
+  azure_host_role_map:
+    hosts: "{{ vmss_host_pairs }}"
+    vars_list: "{{ azure_multiple_vmss_vars }}"
+    cluster_name: "{{ vmss_name }}"
+  register: assignments
+
+- name: Write role assignments to muchos.props
+  lineinfile:
+    path: "{{ deploy_path }}/conf/muchos.props"
+    line: "{{ assignments.result }}"
+
+- name: Change proxy hostname to azure proxy host in muchos.props
+  lineinfile:
+    path: "{{ deploy_path }}/conf/muchos.props"
+    regexp: '^proxy_hostname\s*=\s*'
+    line: "proxy_hostname = {{ azure_proxy_host }}"
+  when: azure_proxy_host is defined and azure_proxy_host and azure_proxy_host 
!= None
+
+- name: Change proxy hostname to first node in vmss in muchos.props
+  lineinfile:
+    path: "{{ deploy_path }}/conf/muchos.props"
+    regexp: '^proxy_hostname\s*=\s*'
+    line: "proxy_hostname = {{ vmss_host_pairs[0].value }}"
+  when: not (azure_proxy_host is defined and azure_proxy_host and 
azure_proxy_host != None)
+
+
diff --git a/ansible/roles/azure/tasks/main.yml 
b/ansible/roles/azure/tasks/main.yml
index d252833..0b5dab9 100644
--- a/ansible/roles/azure/tasks/main.yml
+++ b/ansible/roles/azure/tasks/main.yml
@@ -30,7 +30,12 @@
   when: use_adlsg2
 - import_tasks: create_optional_proxy.yml
 - import_tasks: create_vmss.yml
+  when: use_multiple_vmss is not defined or not use_multiple_vmss
 - import_tasks: assign_msi_single_vmss.yml
-  when: use_adlsg2
+  when: (use_multiple_vmss is not defined or not use_multiple_vmss) and 
use_adlsg2
+- import_tasks: create_multiple_vmss.yml
+  when: use_multiple_vmss
+- import_tasks: assign_msi_multiple_vmss.yml
+  when: use_multiple_vmss and use_adlsg2
 - import_tasks: create_log_analytics_ws.yml
   when: az_oms_integration_needed and (az_logs_id is not defined or (not 
az_logs_id) or az_logs_id == None)
diff --git a/conf/.gitignore b/conf/.gitignore
index 0e1a2fc..2181aa7 100644
--- a/conf/.gitignore
+++ b/conf/.gitignore
@@ -2,3 +2,5 @@
 /hosts/*
 /keys
 /user_data
+/azure_vmss_to_hosts.conf
+/azure_multiple_vmss_vars.yml
diff --git a/conf/azure_multiple_vmss_vars.yml.example 
b/conf/azure_multiple_vmss_vars.yml.example
new file mode 100644
index 0000000..71c94f4
--- /dev/null
+++ b/conf/azure_multiple_vmss_vars.yml.example
@@ -0,0 +1,94 @@
+---
+vars_list:
+# The below roles are required when HA is enabled (i.e. hdfs_ha = True)
+  - name_suffix: vmss1
+    sku: Standard_D4s_v3
+    perf_profile: azd8s
+    data_disk_count: 4
+    disk_sku: Premium_LRS
+    data_disk_size_gb: 512
+    capacity: 4
+    roles:
+      namenode: 1
+      resourcemanager: 1
+      accumulomaster: 1
+      zookeeper: 1
+      journalnode: 2
+      zkfc: 1
+      client: 4
+  - name_suffix: vmss2
+    sku: Standard_D4s_v3
+    perf_profile: perf-small
+    data_disk_count: 4
+    disk_sku: Standard_LRS
+    data_disk_size_gb: 512
+    capacity: 4
+    roles:
+      zookeeper: 2
+      metrics: 1
+      journalnode: 1
+      namenode: 1
+      zkfc: 1
+      accumulomaster: 1
+      resourcemanager: 1
+      client: 4
+  - name_suffix: vmss3
+    sku: Standard_D4s_v3
+    perf_profile: azd8s
+    data_disk_count: 8
+    disk_sku: Standard_LRS
+    data_disk_size_gb: 1024
+    capacity: 4
+    roles:
+      worker: 4
+    
+  # The below roles are required when HA is not enabled  (i.e hdfs_ha = False)
+  - name_suffix: vmss4
+    sku: Standard_D4s_v3
+    perf_profile: azd8s
+    data_disk_count: 4
+    disk_sku: Premium_LRS
+    data_disk_size_gb: 512
+    capacity: 3
+    roles:
+      namenode: 1
+      resourcemanager: 1
+      accumulomaster: 1
+      zookeeper: 1
+      client: 3
+  - name_suffix: vmss5
+    sku: Standard_D4s_v3
+    perf_profile: azd8s
+    data_disk_count: 4
+    disk_sku: Premium_LRS
+    data_disk_size_gb: 512
+    capacity: 1
+    roles:
+      metrics: 1
+      client: 1
+  - name_suffix: vmss6
+    sku: Standard_D8s_v3
+    perf_profile: azd8s
+    data_disk_count: 8
+    disk_sku: Standard_LRS
+    data_disk_size_gb: 1024
+    capacity: 3
+    roles:
+      worker: 3
+
+  # The below VMSS definition is provided just as a sample to show how we
+  # can define per-VMSS mount root and disk path and pattern definitions
+  # Using ephemeral storage like the one shown below should not be used for
+  # any case where data persistence is required
+  - name_suffix: vmss7
+    sku: Standard_L16s_v2
+    perf_profile: azd8s
+    azure_disk_device_path: /dev
+    azure_disk_device_pattern: nvme*n1
+    mount_root: /nvmedata
+    data_disk_count: 0
+    disk_sku: Standard_LRS
+    data_disk_size_gb: 1024
+    capacity: 4
+    roles:
+      worker: 4
diff --git a/conf/muchos.props.example b/conf/muchos.props.example
index 54fcefb..4af9e70 100644
--- a/conf/muchos.props.example
+++ b/conf/muchos.props.example
@@ -114,6 +114,8 @@ vnet_cidr = 10.0.0.0/8
 subnet = subnet1
 # The CIDR prefix used for the single subnet within the virtual network.
 subnet_cidr = 10.1.0.0/16
+#Optional. If set to True, will create multiple VMSS based on 
multiple_vmss_vars.yml
+use_multiple_vmss = False
 # Azure image reference defined as a pipe-delimited string in the format 
offer|publisher|sku|version|
 # Please refer 'Launching an Azure cluster' section of the README before 
making changes 
 azure_image_reference = CentOS|OpenLogic|7.5|latest|
diff --git a/docs/azure-multiple-vmss.md b/docs/azure-multiple-vmss.md
new file mode 100644
index 0000000..082281f
--- /dev/null
+++ b/docs/azure-multiple-vmss.md
@@ -0,0 +1,34 @@
+# Azure based clusters using multiple Virtual Machine Scale Sets (VMSS)
+By default, Azure based deployments of Accumulo clusters provision a single 
[Virtual Machine Scale Set - 
VMSS](https://docs.microsoft.com/en-us/azure/virtual-machine-scale-sets/overview).
 A VMSS consists of a set of Virtual Machine instances, which are individually 
identified by their hostname and private IP address.
+
+## Challenges with a single VMSS deployment
+1. All VM instances in a single VMSS by default are of the same size (CPU, RAM 
and disks). This can be a constraint when provisioning larger clusters, wherein 
the user might require different resource sizes for leader nodes as compared to 
worker nodes.
+1. It may also be required to use different disk types (SSD / HDD / NVME) for 
different sets of nodes in the same Muchos cluster. This is not possible when 
using a single VMSS deployment.
+1. The `muchos launch` command automatically populates the `nodes` section in 
`muchos.props` with these hostnames and IP addresses based on the details of 
the VM instances in the VMSS. In the case of a single VMSS deployment, 
hard-coded assignment of a minimum (but sufficient) set of roles, to these 
nodes is done. As a result, deploying additional roles, such as Fluo, or Spark, 
is not possible unless the user manually edits the `muchos.props` file after 
the `muchos launch` command, and p [...]
+1. Also, in certain cases, it may be necessary to spawn multiple VMSS 
deployments, to overcome 
[limits](https://docs.microsoft.com/en-us/azure/azure-resource-manager/management/azure-subscription-service-limits#virtual-machine-scale-sets-limits)
 such as the maximum number of VMs in a single VMSS. For example, attempting to 
launch a 2000-node Azure cluster through Muchos would not work if deploying 
using a single VMSS, as the current limit for VMSS is 1000 VMs in a single VMSS.
+1. Finally, it may be required to assign different perf profiles to different 
sets of VMs in the cluster. For example, larger nodes will typically have 
larger JVM heap sizes / YARN memory configured as compared to smaller nodes.
+
+## Multiple VMSS deployment
+To address the above challenges, Muchos supports a "multiple VMSS" mode of 
installation for Azure clusters. To use this mode, the user needs to:
+1. Set `use_multiple_vmss = True` in `muchos.props`
+1. Create an appropriate `azure_multiple_vmss_vars.yml` file in the 
`fluo-muchos/conf` folder
+
+In such a case, the `muchos launch` command will create multiple VMSS 
deployments in parallel, and later assign roles to the VM instances within each 
VMSS, based on the specification in the `azure_multiple_vmss_vars.yml` file. 
Subsequently, `muchos setup` runs without any modifications.
+
+## Format of the mutliple_vmss_vars.yml file
+Muchos provides a [sample file](../conf/azure_multiple_vmss_vars.yml.example) 
which can be used as a template to customize. The YAML file is a list of VMSS 
specifications. The following fields can be specified for each VMSS:
+
+| Attribute | Required or optional? | Default value | Description |
+|-----------|------------------------|---------|-------------|
+| `name_suffix` | Required | - | The name of each VMSS is constructed by 
concatenating the Muchos cluster name with this string. As an example, if your 
Muchos cluster is called `test`, and this field has a value of `ldr`, then the 
VMSS is created with a name `test-ldr`|
+| `sku` | Required | - | A string identifier specifying the Azure VM size. 
Refer to the [Azure 
documentation](https://docs.microsoft.com/en-us/azure/virtual-machines/dv3-dsv3-series)
 to lookup these strings. An example VM size is `Standard_D32s_v3` for a 
32-vCPU 
[Dsv3](https://docs.microsoft.com/en-us/azure/virtual-machines/dv3-dsv3-series#dsv3-series)
 VM|
+| `perf_profile` | Required | - | A string identifying a corresponding 
performance profile configuration section in muchos.props which contains perf 
profile parameters |
+| `azure_disk_device_path`| Optional | If not specified, the corresponding 
`azure_disk_device_path` value from the `azure` section in 
[muchos.props](../conf/muchos.props.example) is used | This is a device path 
used to enumerate attached SCSI or NVME disks to use for persistent local 
storage |
+| `azure_disk_device_pattern`| Optional | If not specified, the corresponding 
`azure_disk_device_pattern` value from the `azure` section in 
[muchos.props](../conf/muchos.props.example) is used | This is a device name 
wildcard pattern used (internally) in conjunction with `azure_disk_device_path` 
to enumerate attached SCSI or NVME disks to use for persistent local storage |
+| `mount_root`| Optional | If not specified, the corresponding `mount_root` 
value from the `azure` section in [muchos.props](../conf/muchos.props.example) 
is used | This is the folder in the file system where the persistent disks are 
mounted |
+| `data_disk_count`| Required | - | An integer value which specifies the 
number of persistent (managed) data disks to be attached to each VM in the 
VMSS. It can be 0 in specific cases - see [notes on using ephemeral 
storage](./azure-ephemeral-disks.md) for details |
+| `disk_sku`| Required | - | Can be either Standard_LRS (for HDD) or 
Premium_LRS (for Premium SSD). At this time, we have not tested the use of 
Standard SSD or UltraSSD with Muchos |
+| `data_disk_size_gb`| Required | - | An integer value specifying the size of 
each persistent (managed) data disk in GiB |
+| `image_reference`| Optional | If not specified, the corresponding 
`azure_image_reference` value from the `azure` section in 
[muchos.props](../conf/muchos.props.example) is used | Azure image reference 
defined as a pipe-delimited string.
+| `capacity`| Required | - | An integer value specifying the number of VMs in 
this specific VMSS |
+| `roles`| Required | - | This is a dictionary (list of key-value pairs), each 
of which should be of the form `muchos_role_name`: `integer count`. See [sample 
file](../conf/azure_multiple_vmss_vars.yml.example) for examples. the `muchos 
launch` command for Azure clusters uses this list to assign roles to hosts in a 
sequential fashion. For example, if a given VMSS has 3 `zkfc` role members and 
2 `namenode` role members defined, host0 and host1 in the VMSS will be assigned 
both `zkfc` and  [...]
diff --git a/lib/muchos/azure.py b/lib/muchos/azure.py
index b7ae53e..a12bede 100644
--- a/lib/muchos/azure.py
+++ b/lib/muchos/azure.py
@@ -128,3 +128,139 @@ class VmssCluster(ExistingCluster):
         if v.lower() in ("false", "no"):
             return False
         return v
+
+    # For Azure clusters this method creates Ansible group variables which
+    # allow overriding the "global" host or play variables with group specific
+    # variables. Because Ansible group variables override host variables this
+    # is a very powerful feature to support per-group specialization of
+    # configuration. Currently this is used to define the following:
+    #
+    # 1. Variables for different perf profiles for different groups of hosts
+    #    This capability allows specifying different settings for clusters
+    #    which have heterogenous hardware - RAM especially
+    #
+    # 2. Different mount roots for different sets of hosts, with a fallback to
+    #    using the global mount_root defined in the Ansible hosts file
+    #
+    # 3. Different worker_data_dirs and default_data_dirs for specific groups
+    #    of hosts.
+    #
+    # 4. Different Azure disk path and disk name pattern for specific groups
+    #    of hosts.
+    def add_specialized_configs(self, hosts_file):
+        if self.config.use_multiple_vmss():
+            vmss_hosts = open(
+                path.join(
+                    self.config.deploy_path,
+                    "conf/azure_vmss_to_hosts.conf"
+                ),
+                "r",
+            )
+            print("\n", file=hosts_file)
+            for line in vmss_hosts:
+                print(line.rstrip("\n"), file=hosts_file)
+
+            for curr_vmss in self.config.azure_multiple_vmss_vars["vars_list"]:
+                vmss_group_name = (
+                    self.config.cluster_name + "-" + curr_vmss["name_suffix"]
+                )
+                profile = curr_vmss["perf_profile"]
+
+                with open(
+                    path.join(
+                        self.config.deploy_path,
+                        "ansible/group_vars/"
+                        + vmss_group_name.replace("-", "_"),
+                    ),
+                    "w",
+                ) as vmss_file:
+                    for (name, value) in self.config.items(profile):
+                        print("{0}: {1}".format(name, value), file=vmss_file)
+
+                    # use VMSS-specific mount root if one is defined or
+                    # the global mount root if there is no VMSS-specific value
+                    curr_mount_root = curr_vmss.get(
+                        "mount_root", self.config.mount_root()
+                    )
+
+                    # write the mount root out to the per-VMSS group vars
+                    print(
+                        "{0}: {1}".format("mount_root", curr_mount_root),
+                        file=vmss_file,
+                    )
+
+                    # also include per-VMSS worker_data_dirs
+                    curr_worker_dirs = self.config.data_dirs_internal(
+                        "worker",
+                        curr_vmss["data_disk_count"],
+                        curr_mount_root,
+                        curr_vmss["sku"],
+                    )
+
+                    print(
+                        "{0}: {1}".format(
+                            "worker_data_dirs", curr_worker_dirs,
+                        ),
+                        file=vmss_file,
+                    )
+
+                    # per-VMSS default_data_dirs
+                    curr_default_dirs = self.config.data_dirs_internal(
+                        "default",
+                        curr_vmss["data_disk_count"],
+                        curr_mount_root,
+                        curr_vmss["sku"],
+                    )
+
+                    print(
+                        "{0}: {1}".format(
+                            "default_data_dirs", curr_default_dirs,
+                        ),
+                        file=vmss_file,
+                    )
+
+                    # also write out per-VMSS disk path and pattern
+                    # using the global value from muchos.props as default
+                    # if the VMSS does not define a custom value
+                    print(
+                        "{0}: {1}".format(
+                            "azure_disk_device_path",
+                            curr_vmss.get(
+                                "azure_disk_device_path",
+                                self.config.azure_disk_device_path(),
+                            ),
+                        ),
+                        file=vmss_file,
+                    )
+
+                    print(
+                        "{0}: {1}".format(
+                            "azure_disk_device_pattern",
+                            curr_vmss.get(
+                                "azure_disk_device_pattern",
+                                self.config.azure_disk_device_pattern(),
+                            ),
+                        ),
+                        file=vmss_file,
+                    )
+
+                    # these nested loops are a tight (if slightly less
+                    # readable way) of creating the various directory ordinals
+                    for dirtype in ["default", "worker"]:
+                        for ordinal in range(3):
+                            print(
+                                "{0}: {1}".format(
+                                    "{0}dir_ordinal{1}".format(
+                                        dirtype, ordinal
+                                    ),
+                                    0
+                                    if len(
+                                        curr_default_dirs
+                                        if dirtype == "default"
+                                        else curr_worker_dirs
+                                    )
+                                    < ordinal + 1
+                                    else ordinal,
+                                ),
+                                file=vmss_file,
+                            )
diff --git a/lib/muchos/config/azure.py b/lib/muchos/config/azure.py
index ddc3f89..4a77b02 100644
--- a/lib/muchos/config/azure.py
+++ b/lib/muchos/config/azure.py
@@ -19,6 +19,7 @@ from sys import exit
 from .base import BaseConfig
 from .decorators import ansible_host_var, is_valid, default
 from .validators import is_type, is_in
+from yaml import load, FullLoader
 
 
 class AzureDeployConfig(BaseConfig):
@@ -40,6 +41,15 @@ class AzureDeployConfig(BaseConfig):
             cluster_name,
         )
 
+        # load azure_multiple_vmss_vars.yml
+        if self.use_multiple_vmss():
+            with open(
+                "conf/azure_multiple_vmss_vars.yml"
+            ) as azure_multiple_vmss_vars_file:
+                self.azure_multiple_vmss_vars = load(
+                    azure_multiple_vmss_vars_file.read(), Loader=FullLoader
+                )
+
     def verify_config(self, action):
         self._verify_config(action)
 
@@ -189,3 +199,9 @@ class AzureDeployConfig(BaseConfig):
     @default(None)
     def instance_volumes_adls(self):
         return self.get("azure", "instance_volumes_adls")
+
+    @ansible_host_var
+    @default(False)
+    @is_valid(is_in([True, False]))
+    def use_multiple_vmss(self):
+        return self.getboolean("azure", "use_multiple_vmss")

Reply via email to