This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 3830f05  ARROW-6917: [Archery][Release] Add support for JIRA curation, 
changelog generation and commit cherry-picking for maintenance releases
3830f05 is described below

commit 3830f05a2b6f354437b50e66f65a1578eebfc693
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Wed Jul 15 20:45:27 2020 +0200

    ARROW-6917: [Archery][Release] Add support for JIRA curation, changelog 
generation and commit cherry-picking for maintenance releases
    
    Also aims to resolve https://issues.apache.org/jira/browse/ARROW-8456
    
    ~I'd like to cover it (at least partly) with tests, but now I'm using it to 
create the maintenance branch for 0.17.1.~
    
    It also supports (re-)generating changelogs based on the jira and commit 
curation of each release including PARQUET issues.
    
    Closes #7162 from kszucs/cherry-picking
    
    Authored-by: Krisztián Szűcs <[email protected]>
    Signed-off-by: Krisztián Szűcs <[email protected]>
---
 dev/archery/MANIFEST.in                            |   4 +
 dev/archery/archery/cli.py                         | 102 +++++
 dev/archery/archery/release.py                     | 421 +++++++++++++++++++++
 .../archery/templates/release_changelog.md.j2}     |  20 +-
 .../archery/templates/release_curation.txt.j2      |  41 ++
 dev/archery/archery/utils/report.py                |  64 ++++
 dev/archery/setup.py                               |   4 +-
 dev/release/00-prepare.sh                          |   4 +-
 dev/release/changelog.py                           | 247 ------------
 dev/release/post-03-website.sh                     |   1 +
 dev/release/rat_exclude_files.txt                  |   2 +
 dev/tasks/crossbow.py                              | 136 +------
 12 files changed, 651 insertions(+), 395 deletions(-)

diff --git a/dev/archery/MANIFEST.in b/dev/archery/MANIFEST.in
new file mode 100644
index 0000000..90fe034
--- /dev/null
+++ b/dev/archery/MANIFEST.in
@@ -0,0 +1,4 @@
+include ../../LICENSE.txt
+include ../../NOTICE.txt
+
+include archery/reports/*
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 7cb2ed2..490e628 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -22,6 +22,7 @@ import errno
 import json
 import logging
 import os
+import pathlib
 import sys
 
 from .benchmark.compare import RunnerComparator, DEFAULT_THRESHOLD
@@ -833,5 +834,106 @@ def docker_compose_images(obj):
         click.echo(' - {}'.format(image))
 
 
[email protected]('release')
[email protected]("--src", metavar="<arrow_src>", default=None,
+              callback=validate_arrow_sources,
+              help="Specify Arrow source directory.")
[email protected]("--jira-cache", type=click.Path(), default=None,
+              help="File path to cache queried JIRA issues per version.")
[email protected]_obj
+def release(obj, src, jira_cache):
+    """Release releated commands."""
+    from .release import Jira, CachedJira
+
+    jira = Jira()
+    if jira_cache is not None:
+        jira = CachedJira(jira_cache, jira=jira)
+
+    obj['jira'] = jira
+    obj['repo'] = src.path
+
+
[email protected]('curate')
[email protected]('version')
[email protected]_obj
+def release_curate(obj, version):
+    """Release curation."""
+    from .release import Release
+
+    release = Release.from_jira(version, jira=obj['jira'], repo=obj['repo'])
+    curation = release.curate()
+
+    click.echo(curation.render('console'))
+
+
[email protected]('changelog')
+def release_changelog():
+    """Release changelog."""
+    pass
+
+
+@release_changelog.command('add')
[email protected]('version')
[email protected]_obj
+def release_changelog_add(obj, version):
+    """Prepend the changelog with the current release"""
+    from .release import Release
+
+    jira, repo = obj['jira'], obj['repo']
+
+    # just handle the current version
+    release = Release.from_jira(version, jira=jira, repo=repo)
+    if release.is_released:
+        raise ValueError('This version has been already released!')
+
+    changelog = release.changelog()
+    changelog_path = pathlib.Path(repo) / 'CHANGELOG.md'
+
+    current_content = changelog_path.read_text()
+    new_content = changelog.render('markdown') + current_content
+
+    changelog_path.write_text(new_content)
+    click.echo("CHANGELOG.md is updated!")
+
+
+@release_changelog.command('regenerate')
[email protected]_obj
+def release_changelog_regenerate(obj):
+    """Regeneretate the whole CHANGELOG.md file"""
+    from .release import Release
+
+    jira, repo = obj['jira'], obj['repo']
+    changelogs = []
+
+    for version in jira.arrow_versions():
+        if not version.released:
+            continue
+        release = Release.from_jira(version, jira=jira, repo=repo)
+        click.echo('Querying changelog for version: {}'.format(version))
+        changelogs.append(release.changelog())
+
+    click.echo('Rendering new CHANGELOG.md file...')
+    changelog_path = pathlib.Path(repo) / 'CHANGELOG.md'
+    with changelog_path.open('w') as fp:
+        for cl in changelogs:
+            fp.write(cl.render('markdown'))
+
+
[email protected]('cherry-pick')
[email protected]_obj
+def release_cherry_pick(obj):
+    """Cherry pick commits."""
+    from .release import PatchRelease
+
+    release = obj['release']
+    if not isinstance(release, PatchRelease):
+        raise click.UsageError('Cherry-pick command only supported for patch '
+                               'releases')
+
+    commands = release.generate_update_branch_commands()
+    for cmd in commands:
+        click.echo(cmd)
+
+
 if __name__ == "__main__":
     archery(obj={})
diff --git a/dev/archery/archery/release.py b/dev/archery/archery/release.py
new file mode 100644
index 0000000..cdbf372
--- /dev/null
+++ b/dev/archery/archery/release.py
@@ -0,0 +1,421 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections import defaultdict
+import functools
+import os
+import re
+import shelve
+import warnings
+
+from git import Repo
+from jira import JIRA
+from semver import VersionInfo as SemVer
+
+from .utils.source import ArrowSources
+from .utils.report import JinjaReport
+
+
+def cached_property(fn):
+    return property(functools.lru_cache(maxsize=1)(fn))
+
+
+class JiraVersion(SemVer):
+
+    __slots__ = SemVer.__slots__ + ('released', 'release_date')
+
+    def __init__(self, original_jira_version):
+        super().__init__(**SemVer.parse(original_jira_version.name).to_dict())
+        self.released = original_jira_version.released
+        self.release_date = getattr(original_jira_version, 'releaseDate', None)
+
+
+class JiraIssue:
+
+    def __init__(self, original_jira_issue):
+        self.key = original_jira_issue.key
+        self.type = original_jira_issue.fields.issuetype.name
+        self.summary = original_jira_issue.fields.summary
+
+    @property
+    def project(self):
+        return self.key.split('-')[0]
+
+    @property
+    def number(self):
+        return int(self.key.split('-')[1])
+
+
+class Jira(JIRA):
+
+    def __init__(self, user=None, password=None):
+        user = user or os.environ.get('APACHE_JIRA_USER')
+        password = password or os.environ.get('APACHE_JIRA_PASSWORD')
+        super().__init__(
+            {'server': 'https://issues.apache.org/jira'},
+            basic_auth=(user, password)
+        )
+
+    def arrow_version(self, version_string):
+        # query version from jira to populated with additional metadata
+        versions = self.arrow_versions()
+        # JiraVersion instances are comparable with strings
+        return versions[versions.index(version_string)]
+
+    def arrow_versions(self):
+        versions = []
+        for v in self.project_versions('ARROW'):
+            try:
+                versions.append(JiraVersion(v))
+            except ValueError:
+                # ignore invalid semantic versions like JS-0.4.0
+                continue
+        return sorted(versions, reverse=True)
+
+    def issue(self, key):
+        return JiraIssue(super().issue(key))
+
+    def arrow_issues(self, version):
+        query = "project=ARROW AND fixVersion={}".format(version)
+        issues = self.search_issues(query, maxResults=False)
+        return list(map(JiraIssue, issues))
+
+
+class CachedJira:
+
+    def __init__(self, cache_path, jira=None):
+        self.jira = jira or Jira()
+        self.cache_path = cache_path
+
+    def __getattr__(self, name):
+        attr = getattr(self.jira, name)
+        return self._cached(name, attr) if callable(attr) else attr
+
+    def _cached(self, name, method):
+        def wrapper(*args, **kwargs):
+            key = str((name, args, kwargs))
+            with shelve.open(self.cache_path) as cache:
+                try:
+                    result = cache[key]
+                except KeyError:
+                    cache[key] = result = method(*args, **kwargs)
+            return result
+        return wrapper
+
+
+_TITLE_REGEX = re.compile(
+    r"(?P<issue>(?P<project>(ARROW|PARQUET))\-\d+)?\s*:?\s*"
+    r"(?P<components>\[.*\])?\s*(?P<summary>.*)"
+)
+_COMPONENT_REGEX = re.compile(r"\[([^\[\]]+)\]")
+
+
+class CommitTitle:
+
+    def __init__(self, summary, project=None, issue=None, components=None):
+        self.project = project
+        self.issue = issue
+        self.components = components or []
+        self.summary = summary
+
+    def __str__(self):
+        out = ""
+        if self.issue:
+            out += "{}: ".format(self.issue)
+        if self.components:
+            for component in self.components:
+                out += "[{}]".format(component)
+            out += " "
+        out += self.summary
+        return out
+
+    @classmethod
+    def parse(cls, headline):
+        matches = _TITLE_REGEX.match(headline)
+        if matches is None:
+            warnings.warn(
+                "Unable to parse commit message `{}`".format(headline)
+            )
+            return CommitTitle(headline)
+
+        values = matches.groupdict()
+        components = values.get('components') or ''
+        components = _COMPONENT_REGEX.findall(components)
+
+        return CommitTitle(
+            values['summary'],
+            project=values.get('project'),
+            issue=values.get('issue'),
+            components=components
+        )
+
+
+class Commit:
+
+    def __init__(self, wrapped):
+        self._title = CommitTitle.parse(wrapped.summary)
+        self._wrapped = wrapped
+
+    def __getattr__(self, attr):
+        if hasattr(self._title, attr):
+            return getattr(self._title, attr)
+        else:
+            return getattr(self._wrapped, attr)
+
+    def __repr__(self):
+        template = '<Commit sha={!r} issue={!r} components={!r} summary={!r}>'
+        return template.format(self.hexsha, self.issue, self.components,
+                               self.summary)
+
+    @property
+    def url(self):
+        return 'https://github.com/apache/arrow/commit/{}'.format(self.hexsha)
+
+    @property
+    def title(self):
+        return self._title
+
+
+class ReleaseCuration(JinjaReport):
+    templates = {
+        'console': 'release_curation.txt.j2'
+    }
+    fields = [
+        'release',
+        'within',
+        'outside',
+        'nojira',
+        'parquet',
+        'nopatch'
+    ]
+
+
+class JiraChangelog(JinjaReport):
+    templates = {
+        'markdown': 'release_changelog.md.j2',
+        'html': 'release_changelog.html.j2'
+    }
+    fields = [
+        'release',
+        'categories'
+    ]
+
+
+class Release:
+
+    def __init__(self):
+        raise TypeError("Do not initialize Release class directly, use "
+                        "Release.from_jira(version) instead.")
+
+    def __repr__(self):
+        if self.version.released:
+            status = "released_at={!r}".format(self.version.release_date)
+        else:
+            status = "pending"
+        return "<{} {!r} {}>".format(self.__class__.__name__,
+                                     str(self.version), status)
+
+    @classmethod
+    def from_jira(cls, version, jira=None, repo=None):
+        jira = jira or Jira()
+
+        if repo is None:
+            arrow = ArrowSources.find()
+            repo = Repo(arrow.path)
+        else:
+            repo = Repo(repo)
+
+        if isinstance(version, str):
+            version = jira.arrow_version(version)
+        elif not isinstance(version, JiraVersion):
+            raise TypeError(version)
+
+        # decide the type of the release based on the version number
+        klass = Release if version.patch == 0 else PatchRelease
+
+        # prevent instantiating release object directly
+        obj = klass.__new__(klass)
+        obj.version = version
+        obj.jira = jira
+        obj.repo = repo
+
+        return obj
+
+    @property
+    def is_released(self):
+        return self.version.released
+
+    @property
+    def tag(self):
+        return "apache-arrow-{}".format(str(self.version))
+
+    @property
+    def branch(self):
+        # TODO(kszucs): add apache remote
+        return "master"
+
+    @cached_property
+    def previous(self):
+        # select all non-patch releases
+        versions = [v for v in self.jira.arrow_versions() if v.patch == 0]
+        position = versions.index(self.version) + 1
+        if position == len(versions):
+            # first release doesn't have a previous one
+            return None
+        previous = versions[position]
+        return Release.from_jira(previous)
+
+    @cached_property
+    def issues(self):
+        return {i.key: i for i in self.jira.arrow_issues(self.version)}
+
+    @cached_property
+    def commits(self):
+        """
+        All commits applied between two versions on the master branch.
+        """
+        if self.previous is None:
+            # first release
+            lower = ''
+        else:
+            lower = self.repo.tags[self.previous.tag]
+
+        if self.version.released:
+            upper = self.repo.tags[self.tag]
+        else:
+            try:
+                upper = self.repo.branches[self.branch]
+            except IndexError:
+                warnings.warn("Release branch `{}` doesn't exist."
+                              .format(self.branch))
+                return []
+
+        commit_range = "{}..{}".format(lower, upper)
+        return list(map(Commit, self.repo.iter_commits(commit_range)))
+
+    def curate(self):
+        # handle commits with parquet issue key specially and query them from
+        # jira and add it to the issues
+        release_issues = self.issues
+
+        within, outside, nojira, parquet = [], [], [], []
+        for c in self.commits:
+            if c.issue is None:
+                nojira.append(c)
+            elif c.issue in release_issues:
+                within.append((release_issues[c.issue], c))
+            elif c.project == 'PARQUET':
+                parquet.append((self.jira.issue(c.issue), c))
+            else:
+                outside.append((self.jira.issue(c.issue), c))
+
+        # remaining jira tickets
+        within_keys = {i.key for i, c in within}
+        nopatch = [issue for key, issue in release_issues.items()
+                   if key not in within_keys]
+
+        return ReleaseCuration(release=self, within=within, outside=outside,
+                               nojira=nojira, parquet=parquet, nopatch=nopatch)
+
+    def changelog(self):
+        release_issues = []
+
+        # get organized report for the release
+        curation = self.curate()
+
+        # jira tickets having patches in the release
+        for issue, _ in curation.within:
+            release_issues.append(issue)
+
+        # jira tickets without patches
+        for issue in curation.nopatch:
+            release_issues.append(issue)
+
+        # parquet patches in the release
+        for issue, _ in curation.parquet:
+            release_issues.append(issue)
+
+        # organize issues into categories
+        issue_types = {
+            'Bug': 'Bug Fixes',
+            'Improvement': 'New Features and Improvements',
+            'New Feature': 'New Features and Improvements',
+            'Sub-task': 'New Features and Improvements',
+            'Task': 'New Features and Improvements',
+            'Test': 'Bug Fixes',
+            'Wish': 'New Features and Improvements',
+        }
+        categories = defaultdict(list)
+        for issue in release_issues:
+            categories[issue_types[issue.type]].append(issue)
+
+        # sort issues by the issue key in ascending order
+        for name, issues in categories.items():
+            issues.sort(key=lambda issue: (issue.project, issue.number))
+
+        return JiraChangelog(release=self, categories=categories)
+
+
+class PatchRelease(Release):
+
+    @property
+    def branch(self):
+        # TODO(kszucs): add apache remote
+        return "maint-{}.{}.x".format(self.version.major, self.version.minor)
+
+    @cached_property
+    def previous(self):
+        # select all releases under this minor
+        versions = [v for v in self.jira.arrow_versions()
+                    if v.minor == self.version.minor]
+        previous = versions[versions.index(self.version) + 1]
+        return Release.from_jira(previous)
+
+    def generate_update_branch_commands(self):
+        # cherry pick not yet cherry picked commits on top of the maintenance
+        # branch
+        try:
+            target = self.repo.branches[self.branch]
+        except IndexError:
+            # maintenance branch doesn't exist yet, so create one based off of
+            # the previous git tag
+            target = self.repo.create_head(self.branch, self.previous.tag)
+
+        # collect commits applied on master since the root of the maintenance
+        # branch (the minor release of this patch release)
+        commit_range = "apache-arrow-{}.{}.0..master".format(
+            self.version.major, self.version.minor
+        )
+        commits = list(map(Commit, self.repo.iter_commits(commit_range)))
+
+        # iterate over commits applied on master and keep the original order of
+        # the commits to minimize the merge conflicts during cherry-picks
+        patch_commits = [c for c in commits if c.issue in self.issues]
+
+        commands = [
+            'git checkout -b {} {}'.format(target, self.previous.tag)
+        ]
+        for c in reversed(patch_commits):
+            commands.append(
+                'git cherry-pick {}  # {}'.format(c.hexsha, c.title)
+            )
+
+        return commands
+
+    # TODO(kszucs): update_branch method which tries to cherry pick to a
+    # temporary branch and if the patches apply cleanly then update the maint
+    # reference
diff --git a/dev/release/update-changelog.sh 
b/dev/archery/archery/templates/release_changelog.md.j2
old mode 100755
new mode 100644
similarity index 71%
rename from dev/release/update-changelog.sh
rename to dev/archery/archery/templates/release_changelog.md.j2
index 4fda016..c0406dd
--- a/dev/release/update-changelog.sh
+++ b/dev/archery/archery/templates/release_changelog.md.j2
@@ -1,5 +1,4 @@
-#!/bin/bash
-#
+{#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -16,16 +15,15 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-#
-set -e
-
-SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+#}
+# Apache Arrow {{ release.version }} ({{ release.version.release_date or 
today() }})
 
-version=$1
+{% for category, issues in categories.items() -%}
 
-CHANGELOG=$SOURCE_DIR/../../CHANGELOG.md
+## {{ category }}
 
-${PYTHON:-python} $SOURCE_DIR/changelog.py $version 0 $CHANGELOG
+{% for issue in issues -%}
+* [{{ issue.key }}](https://issues.apache.org/jira/browse/{{ issue.key }}) - 
{{ issue.summary | md }}
+{% endfor %}
 
-git add $CHANGELOG
-git commit -m "[Release] Update CHANGELOG.md for $version"
+{% endfor %}
diff --git a/dev/archery/archery/templates/release_curation.txt.j2 
b/dev/archery/archery/templates/release_curation.txt.j2
new file mode 100644
index 0000000..a5d11e9
--- /dev/null
+++ b/dev/archery/archery/templates/release_curation.txt.j2
@@ -0,0 +1,41 @@
+{#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#}
+Total number of JIRA tickets assigned to version {{ release.version }}: {{ 
release.issues|length }}
+
+Total number of applied patches since version {{ release.previous.version }}: 
{{ release.commits|length }}
+
+Patches with assigned issue in version {{ release.version }}:
+{% for issue, commit in within -%}
+ - {{ commit.url }} {{ commit.title }}
+{% endfor %}
+
+Patches with assigned issue outside of version {{ release.version }}:
+{% for issue, commit in outside -%}
+ - {{ commit.url }} {{ commit.title }}
+{% endfor %}
+
+Patches in version {{ release.version }} without a linked issue:
+{% for commit in nojira -%}
+ - {{ commit.url }} {{ commit.title }}
+{% endfor %}
+
+JIRA issues in version {{ release.version }} without a linked patch:
+{% for issue in nopatch -%}
+ - https://issues.apache.org/jira/browse/{{ issue.key }}
+{% endfor %}
diff --git a/dev/archery/archery/utils/report.py 
b/dev/archery/archery/utils/report.py
new file mode 100644
index 0000000..6c7587d
--- /dev/null
+++ b/dev/archery/archery/utils/report.py
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from abc import ABCMeta, abstractmethod
+import datetime
+
+import jinja2
+
+
+def markdown_escape(s):
+    for char in ('*', '#', '_', '~', '`', '>'):
+        s = s.replace(char, '\\' + char)
+    return s
+
+
+class Report(metaclass=ABCMeta):
+
+    def __init__(self, **kwargs):
+        for field in self.fields:
+            if field not in kwargs:
+                raise ValueError('Missing keyword argument {}'.format(field))
+        self._data = kwargs
+
+    def __getattr__(self, key):
+        return self._data[key]
+
+    @abstractmethod
+    def fields(self):
+        pass
+
+    @property
+    @abstractmethod
+    def templates(self):
+        pass
+
+
+class JinjaReport(Report):
+
+    def __init__(self, **kwargs):
+        self.env = jinja2.Environment(
+            loader=jinja2.PackageLoader('archery', 'templates')
+        )
+        self.env.filters['md'] = markdown_escape
+        self.env.globals['today'] = datetime.date.today
+        super().__init__(**kwargs)
+
+    def render(self, template_name):
+        template_path = self.templates[template_name]
+        template = self.env.get_template(template_path)
+        return template.render(**self._data)
diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index dd1b555..8823ace 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -27,7 +27,8 @@ if sys.version_info < (3, 5):
 extras = {
     'benchmark': ['pandas'],
     'bot': ['ruamel.yaml', 'pygithub'],
-    'docker': ['ruamel.yaml', 'python-dotenv']
+    'docker': ['ruamel.yaml', 'python-dotenv'],
+    'release': ['jinja2', 'jira', 'semver', 'gitpython']
 }
 extras['all'] = list(set(functools.reduce(operator.add, extras.values())))
 
@@ -45,6 +46,7 @@ setup(
         'archery.lang',
         'archery.utils'
     ],
+    include_package_data=True,
     install_requires=['click>=7'],
     tests_require=['pytest', 'responses'],
     extras_require=extras,
diff --git a/dev/release/00-prepare.sh b/dev/release/00-prepare.sh
index 2e357f0..7b054e9 100755
--- a/dev/release/00-prepare.sh
+++ b/dev/release/00-prepare.sh
@@ -195,7 +195,9 @@ tag=apache-arrow-${version}
 if [ ${PREPARE_CHANGELOG} -gt 0 ]; then
   echo "Updating changelog for $version"
   # Update changelog
-  $SOURCE_DIR/update-changelog.sh $version
+  archery release changelog add $version
+  git add ${SOURCE_DIR}/../../CHANGELOG.md
+  git commit -m "[Release] Update CHANGELOG.md for $version"
 fi
 
 if [ ${PREPARE_LINUX_PACKAGES} -gt 0 ]; then
diff --git a/dev/release/changelog.py b/dev/release/changelog.py
deleted file mode 100755
index 4e85e46..0000000
--- a/dev/release/changelog.py
+++ /dev/null
@@ -1,247 +0,0 @@
-#!/usr/bin/env python
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Utility for generating changelogs for fix versions
-# requirements: pip install jira
-# Set $JIRA_USERNAME, $JIRA_PASSWORD environment variables
-
-from __future__ import print_function
-
-from collections import defaultdict
-from datetime import datetime
-from io import StringIO
-import locale
-import os
-import re
-import sys
-
-import jira.client
-
-# ASF JIRA username
-JIRA_USERNAME = os.environ["APACHE_JIRA_USERNAME"]
-# ASF JIRA password
-JIRA_PASSWORD = os.environ["APACHE_JIRA_PASSWORD"]
-
-JIRA_API_BASE = "https://issues.apache.org/jira";
-
-asf_jira = jira.client.JIRA(options={'server': JIRA_API_BASE},
-                            basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
-
-
-locale.setlocale(locale.LC_ALL, 'en_US.utf-8')
-
-
-release_dir = os.path.realpath(os.path.dirname(__file__))
-ARROW_ROOT_DEFAULT = os.path.join(release_dir, '..', '..')
-ARROW_ROOT = os.environ.get("ARROW_ROOT", ARROW_ROOT_DEFAULT)
-
-
-def get_issues_for_version(version):
-    jql = ("project=ARROW "
-           "AND fixVersion='{0}' "
-           "AND status = Resolved "
-           "AND resolution in (Fixed, Done) "
-           "ORDER BY issuetype DESC").format(version)
-
-    return asf_jira.search_issues(jql, maxResults=9999)
-
-
-def get_last_major_version(current_version):
-    # TODO: This doesn't work for generating a changelog for the _first_ major
-    # release, but we probably don't care
-    major_versions = [
-        v for v in asf_jira.project('ARROW').versions
-        if v.name[0].isdigit() and v.name.split('.')[-1] == '0'
-    ]
-
-    # Sort the versions
-    def sort_version(x):
-        major, minor, patch = x.name.split('.')
-        return int(major), int(minor)
-
-    major_versions.sort(key=sort_version)
-
-    # Find index of version being released
-    current_version_index = ([x.name for x in major_versions]
-                             .index(current_version))
-
-    return major_versions[current_version_index - 1]
-
-
-def get_jiras_from_git_changelog(current_version):
-    # We use this to get the resolved PARQUET JIRAs
-    from subprocess import check_output
-
-    last_major_version = get_last_major_version(current_version)
-
-    # Path to .git directory
-    git_dir = os.path.join(ARROW_ROOT, '.git')
-
-    cmd = ['git', '--git-dir', git_dir, 'log', '--pretty=format:%s',
-           'apache-arrow-{}..apache-arrow-{}'.format(last_major_version,
-                                                     current_version)]
-    output = check_output(cmd).decode('utf-8')
-
-    resolved_jiras = []
-    regex = re.compile(r'[a-zA-Z]+-[0-9]+')
-    for desc in output.splitlines():
-        maybe_jira = desc.split(':')[0]
-
-        # Sometimes people forget the colon
-        maybe_jira = maybe_jira.split(' ')[0]
-        if regex.match(maybe_jira):
-            resolved_jiras.append(maybe_jira)
-
-    return resolved_jiras
-
-
-LINK_TEMPLATE = '[{0}](https://issues.apache.org/jira/browse/{0})'
-
-
-def format_changelog_markdown(issues, out):
-    issues_by_type = defaultdict(list)
-    for issue in issues:
-        issues_by_type[issue.fields.issuetype.name].append(issue)
-
-    for issue_type, issue_group in sorted(issues_by_type.items()):
-        issue_group.sort(key=lambda x: x.key)
-
-        out.write('## {0}\n\n'.format(_escape_for_markdown(issue_type)))
-        for issue in issue_group:
-            markdown_summary = _escape_for_markdown(issue.fields.summary)
-            out.write('* {0} - {1}\n'.format(issue.key,
-                                             markdown_summary))
-        out.write('\n')
-
-
-def _escape_for_markdown(x):
-    return (
-        x.replace('_', r'\_')  # underscores
-        .replace('`', r'\`')   # backticks
-        .replace('*', r'\*')   # asterisks
-    )
-
-
-def format_changelog_website(issues, out):
-    NEW_FEATURE = 'New Features and Improvements'
-    BUGFIX = 'Bug Fixes'
-
-    CATEGORIES = {
-        'New Feature': NEW_FEATURE,
-        'Improvement': NEW_FEATURE,
-        'Wish': NEW_FEATURE,
-        'Task': NEW_FEATURE,
-        'Test': BUGFIX,
-        'Bug': BUGFIX,
-        'Sub-task': NEW_FEATURE
-    }
-
-    issues_by_category = defaultdict(list)
-    for issue in issues:
-        issue_type = issue.fields.issuetype.name
-        website_category = CATEGORIES[issue_type]
-        issues_by_category[website_category].append(issue)
-
-    WEBSITE_ORDER = [NEW_FEATURE, BUGFIX]
-
-    for issue_category in WEBSITE_ORDER:
-        issue_group = issues_by_category[issue_category]
-        issue_group.sort(key=lambda x: x.key)
-
-        out.write('## {0}\n\n'.format(issue_category))
-        for issue in issue_group:
-            name = LINK_TEMPLATE.format(issue.key)
-            markdown_summary = _escape_for_markdown(issue.fields.summary)
-            out.write('* {0} - {1}\n'
-                      .format(name, markdown_summary))
-        out.write('\n')
-
-
-def get_resolved_parquet_issues(version):
-    git_resolved_jiras = set(get_jiras_from_git_changelog(version))
-
-    # We don't assume that resolved Parquet issues are found in a single Fix
-    # Version, so for now we query them all and then select only the ones that
-    # are found in the git log
-    jql = ("project=PARQUET "
-           "AND component='parquet-cpp' "
-           "AND status = Resolved "
-           "AND resolution in (Fixed, Done) "
-           "ORDER BY issuetype DESC")
-
-    all_issues = asf_jira.search_issues(jql, maxResults=9999)
-    return [issue for issue in all_issues if issue.key in git_resolved_jiras]
-
-
-def get_changelog(version, for_website=False):
-    issues_for_version = get_issues_for_version(version)
-
-    # Infer resolved Parquet issues, since these can only really be known by
-    # looking at the git log
-    parquet_issues = get_resolved_parquet_issues(version)
-    issues_for_version.extend(parquet_issues)
-
-    buf = StringIO()
-
-    if for_website:
-        format_changelog_website(issues_for_version, buf)
-    else:
-        format_changelog_markdown(issues_for_version, buf)
-
-    return buf.getvalue()
-
-
-def append_changelog(version, changelog_path):
-    new_changelog = get_changelog(version)
-
-    with open(changelog_path, 'r') as f:
-        old_changelog = f.readlines()
-
-    result = StringIO()
-    # Header
-    print(''.join(old_changelog[:18]), file=result)
-
-    # New version
-    today = datetime.today().strftime('%d %B %Y')
-    print('# Apache Arrow {0} ({1})'.format(version, today),
-          end='', file=result)
-    print('\n', file=result)
-    print(new_changelog, end='', file=result)
-
-    # Prior versions
-    print(''.join(old_changelog[19:]), file=result)
-
-    with open(changelog_path, 'w') as f:
-        f.write(result.getvalue().rstrip() + '\n')
-
-
-if __name__ == '__main__':
-    if len(sys.argv) < 2:
-        print('Usage: changelog.py $FIX_VERSION [$IS_WEBSITE] '
-              '[$CHANGELOG_TO_UPDATE]')
-
-    for_website = len(sys.argv) > 2 and sys.argv[2] == '1'
-
-    version = sys.argv[1]
-
-    if len(sys.argv) > 3:
-        changelog_path = sys.argv[3]
-        append_changelog(version, changelog_path)
-    else:
-        print(get_changelog(version, for_website=for_website))
diff --git a/dev/release/post-03-website.sh b/dev/release/post-03-website.sh
index 58121f5..08e5f3a 100755
--- a/dev/release/post-03-website.sh
+++ b/dev/release/post-03-website.sh
@@ -140,6 +140,7 @@ cat <<ANNOUNCE >> "${announce_file}"
 
 ANNOUNCE
 
+# TODO(kszucs): needs to update for the new changelog generation with archery
 ${PYTHON:-python} "${SOURCE_DIR}/changelog.py" ${version} 1 | \
   sed -e 's/^#/##/g' >> "${announce_file}"
 
diff --git a/dev/release/rat_exclude_files.txt 
b/dev/release/rat_exclude_files.txt
index 158790d..25fff19 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -10,6 +10,7 @@
 .github/ISSUE_TEMPLATE/question.md
 ci/etc/rprofile
 ci/etc/*.patch
+CHANGELOG.md
 cpp/CHANGELOG_PARQUET.md
 cpp/src/arrow/io/mman.h
 cpp/src/arrow/util/random.h
@@ -41,6 +42,7 @@ cpp/src/plasma/thirdparty/dlmalloc.c
 cpp/thirdparty/flatbuffers/include/flatbuffers/base.h
 cpp/thirdparty/flatbuffers/include/flatbuffers/flatbuffers.h
 cpp/thirdparty/flatbuffers/include/flatbuffers/stl_emulation.h
+dev/archery/MANIFEST.in
 dev/archery/requirements*.txt
 dev/archery/archery/tests/fixtures/*
 dev/release/rat_exclude_files.txt
diff --git a/dev/tasks/crossbow.py b/dev/tasks/crossbow.py
index adabe66..cb14de6 100755
--- a/dev/tasks/crossbow.py
+++ b/dev/tasks/crossbow.py
@@ -29,7 +29,7 @@ import textwrap
 from io import StringIO
 from pathlib import Path
 from textwrap import dedent
-from datetime import datetime, date
+from datetime import date
 from functools import partial
 
 import click
@@ -66,19 +66,6 @@ requests_log.propagate = True
 CWD = Path(__file__).parent.absolute()
 
 
-NEW_FEATURE = 'New Features and Improvements'
-BUGFIX = 'Bug Fixes'
-
-
-def md(template, *args, **kwargs):
-    """Wraps string.format with naive markdown escaping"""
-    def escape(s):
-        for char in ('*', '#', '_', '~', '`', '>'):
-            s = s.replace(char, '\\' + char)
-        return s
-    return template.format(*map(escape, args), **toolz.valmap(escape, kwargs))
-
-
 def unflatten(mapping):
     """Converts a flat tuple => object mapping to hierarchical one"""
     result = {}
@@ -162,95 +149,6 @@ _default_tree = {
 }
 
 
-class JiraChangelog:
-
-    def __init__(self, version, username, password,
-                 server='https://issues.apache.org/jira'):
-        import jira.client
-        self.server = server
-        # clean version to the first numbers
-        self.version = '.'.join(version.split('.')[:3])
-        query = ("project=ARROW "
-                 "AND fixVersion='{0}' "
-                 "AND status = Resolved "
-                 "AND resolution in (Fixed, Done) "
-                 "ORDER BY issuetype DESC").format(self.version)
-        self.client = jira.client.JIRA({'server': server},
-                                       basic_auth=(username, password))
-        self.issues = self.client.search_issues(query, maxResults=9999)
-
-    def format_markdown(self):
-        out = StringIO()
-
-        issues_by_type = toolz.groupby(lambda i: i.fields.issuetype.name,
-                                       self.issues)
-        for typename, issues in sorted(issues_by_type.items()):
-            issues.sort(key=lambda x: x.key)
-
-            out.write(md('## {}\n\n', typename))
-            for issue in issues:
-                out.write(md('* {} - {}\n', issue.key, issue.fields.summary))
-            out.write('\n')
-
-        return out.getvalue()
-
-    def format_website(self):
-        # jira category => website category mapping
-        categories = {
-            'New Feature': 'feature',
-            'Improvement': 'feature',
-            'Wish': 'feature',
-            'Task': 'feature',
-            'Test': 'bug',
-            'Bug': 'bug',
-            'Sub-task': 'feature'
-        }
-        titles = {
-            'feature': 'New Features and Improvements',
-            'bugfix': 'Bug Fixes'
-        }
-
-        issues_by_category = toolz.groupby(
-            lambda issue: categories[issue.fields.issuetype.name],
-            self.issues
-        )
-
-        out = StringIO()
-
-        for category in ('feature', 'bug'):
-            title = titles[category]
-            issues = issues_by_category[category]
-            issues.sort(key=lambda x: x.key)
-
-            out.write(md('## {}\n\n', title))
-            for issue in issues:
-                link = md('[{0}]({1}/browse/{0})', issue.key, self.server)
-                out.write(md('* {} - {}\n', link, issue.fields.summary))
-            out.write('\n')
-
-        return out.getvalue()
-
-    def render(self, old_changelog, website=False):
-        old_changelog = old_changelog.splitlines()
-        if website:
-            new_changelog = self.format_website()
-        else:
-            new_changelog = self.format_markdown()
-
-        out = StringIO()
-
-        # Apache license header
-        out.write('\n'.join(old_changelog[:18]))
-
-        # Newly generated changelog
-        today = datetime.today().strftime('%d %B %Y')
-        out.write(md('\n\n# Apache Arrow {} ({})\n\n', self.version, today))
-        out.write(new_changelog)
-        out.write('\n'.join(old_changelog[19:]))
-
-        return out.getvalue().strip()
-
-
 class GitRemoteCallbacks(PygitRemoteCallbacks):
 
     def __init__(self, token):
@@ -1489,38 +1387,6 @@ def crossbow(ctx, github_token, arrow_path, queue_path, 
queue_remote,
 
 
 @crossbow.command()
[email protected]('--changelog-path', '-c', type=click.Path(exists=True),
-              default=str(DEFAULT_ARROW_PATH / 'CHANGELOG.md'),
-              help='Path of changelog to update')
[email protected]('--arrow-version', '-v', default=None,
-              help='Set target version explicitly')
[email protected]('--is-website', '-w', default=False, is_flag=True,
-              help='Whether to use website format for changelog. ')
[email protected]('--jira-username', '-u', default=None, help='JIRA username')
[email protected]('--jira-password', '-P', default=None, help='JIRA password')
[email protected]('--dry-run/--write', default=False,
-              help='Just display the new changelog, don\'t write it')
[email protected]_obj
-def changelog(obj, changelog_path, arrow_version, is_website, jira_username,
-              jira_password, dry_run):
-    changelog_path = Path(changelog_path)
-    target = Target.from_repo(obj['arrow'])
-    version = arrow_version or target.version
-
-    changelog = JiraChangelog(version, username=jira_username,
-                              password=jira_password)
-    new_content = changelog.render(changelog_path.read_text(),
-                                   website=is_website)
-
-    if dry_run:
-        click.echo(new_content)
-    else:
-        changelog_path.write_text(new_content)
-        click.echo('New changelog successfully generated, see git diff for the'
-                   'changes')
-
-
[email protected]()
 @click.option('--config-path', '-c',
               type=click.Path(exists=True), default=DEFAULT_CONFIG_PATH,
               help='Task configuration yml. Defaults to tasks.yml')

Reply via email to