This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 3830f05 ARROW-6917: [Archery][Release] Add support for JIRA curation,
changelog generation and commit cherry-picking for maintenance releases
3830f05 is described below
commit 3830f05a2b6f354437b50e66f65a1578eebfc693
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Wed Jul 15 20:45:27 2020 +0200
ARROW-6917: [Archery][Release] Add support for JIRA curation, changelog
generation and commit cherry-picking for maintenance releases
Also aims to resolve https://issues.apache.org/jira/browse/ARROW-8456
~I'd like to cover it (at least partly) with tests, but now I'm using it to
create the maintenance branch for 0.17.1.~
It also supports (re-)generating changelogs based on the jira and commit
curation of each release including PARQUET issues.
Closes #7162 from kszucs/cherry-picking
Authored-by: Krisztián Szűcs <[email protected]>
Signed-off-by: Krisztián Szűcs <[email protected]>
---
dev/archery/MANIFEST.in | 4 +
dev/archery/archery/cli.py | 102 +++++
dev/archery/archery/release.py | 421 +++++++++++++++++++++
.../archery/templates/release_changelog.md.j2} | 20 +-
.../archery/templates/release_curation.txt.j2 | 41 ++
dev/archery/archery/utils/report.py | 64 ++++
dev/archery/setup.py | 4 +-
dev/release/00-prepare.sh | 4 +-
dev/release/changelog.py | 247 ------------
dev/release/post-03-website.sh | 1 +
dev/release/rat_exclude_files.txt | 2 +
dev/tasks/crossbow.py | 136 +------
12 files changed, 651 insertions(+), 395 deletions(-)
diff --git a/dev/archery/MANIFEST.in b/dev/archery/MANIFEST.in
new file mode 100644
index 0000000..90fe034
--- /dev/null
+++ b/dev/archery/MANIFEST.in
@@ -0,0 +1,4 @@
+include ../../LICENSE.txt
+include ../../NOTICE.txt
+
+include archery/reports/*
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 7cb2ed2..490e628 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -22,6 +22,7 @@ import errno
import json
import logging
import os
+import pathlib
import sys
from .benchmark.compare import RunnerComparator, DEFAULT_THRESHOLD
@@ -833,5 +834,106 @@ def docker_compose_images(obj):
click.echo(' - {}'.format(image))
[email protected]('release')
[email protected]("--src", metavar="<arrow_src>", default=None,
+ callback=validate_arrow_sources,
+ help="Specify Arrow source directory.")
[email protected]("--jira-cache", type=click.Path(), default=None,
+ help="File path to cache queried JIRA issues per version.")
[email protected]_obj
+def release(obj, src, jira_cache):
+ """Release releated commands."""
+ from .release import Jira, CachedJira
+
+ jira = Jira()
+ if jira_cache is not None:
+ jira = CachedJira(jira_cache, jira=jira)
+
+ obj['jira'] = jira
+ obj['repo'] = src.path
+
+
[email protected]('curate')
[email protected]('version')
[email protected]_obj
+def release_curate(obj, version):
+ """Release curation."""
+ from .release import Release
+
+ release = Release.from_jira(version, jira=obj['jira'], repo=obj['repo'])
+ curation = release.curate()
+
+ click.echo(curation.render('console'))
+
+
[email protected]('changelog')
+def release_changelog():
+ """Release changelog."""
+ pass
+
+
+@release_changelog.command('add')
[email protected]('version')
[email protected]_obj
+def release_changelog_add(obj, version):
+ """Prepend the changelog with the current release"""
+ from .release import Release
+
+ jira, repo = obj['jira'], obj['repo']
+
+ # just handle the current version
+ release = Release.from_jira(version, jira=jira, repo=repo)
+ if release.is_released:
+ raise ValueError('This version has been already released!')
+
+ changelog = release.changelog()
+ changelog_path = pathlib.Path(repo) / 'CHANGELOG.md'
+
+ current_content = changelog_path.read_text()
+ new_content = changelog.render('markdown') + current_content
+
+ changelog_path.write_text(new_content)
+ click.echo("CHANGELOG.md is updated!")
+
+
+@release_changelog.command('regenerate')
[email protected]_obj
+def release_changelog_regenerate(obj):
+ """Regeneretate the whole CHANGELOG.md file"""
+ from .release import Release
+
+ jira, repo = obj['jira'], obj['repo']
+ changelogs = []
+
+ for version in jira.arrow_versions():
+ if not version.released:
+ continue
+ release = Release.from_jira(version, jira=jira, repo=repo)
+ click.echo('Querying changelog for version: {}'.format(version))
+ changelogs.append(release.changelog())
+
+ click.echo('Rendering new CHANGELOG.md file...')
+ changelog_path = pathlib.Path(repo) / 'CHANGELOG.md'
+ with changelog_path.open('w') as fp:
+ for cl in changelogs:
+ fp.write(cl.render('markdown'))
+
+
[email protected]('cherry-pick')
[email protected]_obj
+def release_cherry_pick(obj):
+ """Cherry pick commits."""
+ from .release import PatchRelease
+
+ release = obj['release']
+ if not isinstance(release, PatchRelease):
+ raise click.UsageError('Cherry-pick command only supported for patch '
+ 'releases')
+
+ commands = release.generate_update_branch_commands()
+ for cmd in commands:
+ click.echo(cmd)
+
+
if __name__ == "__main__":
archery(obj={})
diff --git a/dev/archery/archery/release.py b/dev/archery/archery/release.py
new file mode 100644
index 0000000..cdbf372
--- /dev/null
+++ b/dev/archery/archery/release.py
@@ -0,0 +1,421 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections import defaultdict
+import functools
+import os
+import re
+import shelve
+import warnings
+
+from git import Repo
+from jira import JIRA
+from semver import VersionInfo as SemVer
+
+from .utils.source import ArrowSources
+from .utils.report import JinjaReport
+
+
+def cached_property(fn):
+ return property(functools.lru_cache(maxsize=1)(fn))
+
+
+class JiraVersion(SemVer):
+
+ __slots__ = SemVer.__slots__ + ('released', 'release_date')
+
+ def __init__(self, original_jira_version):
+ super().__init__(**SemVer.parse(original_jira_version.name).to_dict())
+ self.released = original_jira_version.released
+ self.release_date = getattr(original_jira_version, 'releaseDate', None)
+
+
+class JiraIssue:
+
+ def __init__(self, original_jira_issue):
+ self.key = original_jira_issue.key
+ self.type = original_jira_issue.fields.issuetype.name
+ self.summary = original_jira_issue.fields.summary
+
+ @property
+ def project(self):
+ return self.key.split('-')[0]
+
+ @property
+ def number(self):
+ return int(self.key.split('-')[1])
+
+
+class Jira(JIRA):
+
+ def __init__(self, user=None, password=None):
+ user = user or os.environ.get('APACHE_JIRA_USER')
+ password = password or os.environ.get('APACHE_JIRA_PASSWORD')
+ super().__init__(
+ {'server': 'https://issues.apache.org/jira'},
+ basic_auth=(user, password)
+ )
+
+ def arrow_version(self, version_string):
+ # query version from jira to populated with additional metadata
+ versions = self.arrow_versions()
+ # JiraVersion instances are comparable with strings
+ return versions[versions.index(version_string)]
+
+ def arrow_versions(self):
+ versions = []
+ for v in self.project_versions('ARROW'):
+ try:
+ versions.append(JiraVersion(v))
+ except ValueError:
+ # ignore invalid semantic versions like JS-0.4.0
+ continue
+ return sorted(versions, reverse=True)
+
+ def issue(self, key):
+ return JiraIssue(super().issue(key))
+
+ def arrow_issues(self, version):
+ query = "project=ARROW AND fixVersion={}".format(version)
+ issues = self.search_issues(query, maxResults=False)
+ return list(map(JiraIssue, issues))
+
+
+class CachedJira:
+
+ def __init__(self, cache_path, jira=None):
+ self.jira = jira or Jira()
+ self.cache_path = cache_path
+
+ def __getattr__(self, name):
+ attr = getattr(self.jira, name)
+ return self._cached(name, attr) if callable(attr) else attr
+
+ def _cached(self, name, method):
+ def wrapper(*args, **kwargs):
+ key = str((name, args, kwargs))
+ with shelve.open(self.cache_path) as cache:
+ try:
+ result = cache[key]
+ except KeyError:
+ cache[key] = result = method(*args, **kwargs)
+ return result
+ return wrapper
+
+
+_TITLE_REGEX = re.compile(
+ r"(?P<issue>(?P<project>(ARROW|PARQUET))\-\d+)?\s*:?\s*"
+ r"(?P<components>\[.*\])?\s*(?P<summary>.*)"
+)
+_COMPONENT_REGEX = re.compile(r"\[([^\[\]]+)\]")
+
+
+class CommitTitle:
+
+ def __init__(self, summary, project=None, issue=None, components=None):
+ self.project = project
+ self.issue = issue
+ self.components = components or []
+ self.summary = summary
+
+ def __str__(self):
+ out = ""
+ if self.issue:
+ out += "{}: ".format(self.issue)
+ if self.components:
+ for component in self.components:
+ out += "[{}]".format(component)
+ out += " "
+ out += self.summary
+ return out
+
+ @classmethod
+ def parse(cls, headline):
+ matches = _TITLE_REGEX.match(headline)
+ if matches is None:
+ warnings.warn(
+ "Unable to parse commit message `{}`".format(headline)
+ )
+ return CommitTitle(headline)
+
+ values = matches.groupdict()
+ components = values.get('components') or ''
+ components = _COMPONENT_REGEX.findall(components)
+
+ return CommitTitle(
+ values['summary'],
+ project=values.get('project'),
+ issue=values.get('issue'),
+ components=components
+ )
+
+
+class Commit:
+
+ def __init__(self, wrapped):
+ self._title = CommitTitle.parse(wrapped.summary)
+ self._wrapped = wrapped
+
+ def __getattr__(self, attr):
+ if hasattr(self._title, attr):
+ return getattr(self._title, attr)
+ else:
+ return getattr(self._wrapped, attr)
+
+ def __repr__(self):
+ template = '<Commit sha={!r} issue={!r} components={!r} summary={!r}>'
+ return template.format(self.hexsha, self.issue, self.components,
+ self.summary)
+
+ @property
+ def url(self):
+ return 'https://github.com/apache/arrow/commit/{}'.format(self.hexsha)
+
+ @property
+ def title(self):
+ return self._title
+
+
+class ReleaseCuration(JinjaReport):
+ templates = {
+ 'console': 'release_curation.txt.j2'
+ }
+ fields = [
+ 'release',
+ 'within',
+ 'outside',
+ 'nojira',
+ 'parquet',
+ 'nopatch'
+ ]
+
+
+class JiraChangelog(JinjaReport):
+ templates = {
+ 'markdown': 'release_changelog.md.j2',
+ 'html': 'release_changelog.html.j2'
+ }
+ fields = [
+ 'release',
+ 'categories'
+ ]
+
+
+class Release:
+
+ def __init__(self):
+ raise TypeError("Do not initialize Release class directly, use "
+ "Release.from_jira(version) instead.")
+
+ def __repr__(self):
+ if self.version.released:
+ status = "released_at={!r}".format(self.version.release_date)
+ else:
+ status = "pending"
+ return "<{} {!r} {}>".format(self.__class__.__name__,
+ str(self.version), status)
+
+ @classmethod
+ def from_jira(cls, version, jira=None, repo=None):
+ jira = jira or Jira()
+
+ if repo is None:
+ arrow = ArrowSources.find()
+ repo = Repo(arrow.path)
+ else:
+ repo = Repo(repo)
+
+ if isinstance(version, str):
+ version = jira.arrow_version(version)
+ elif not isinstance(version, JiraVersion):
+ raise TypeError(version)
+
+ # decide the type of the release based on the version number
+ klass = Release if version.patch == 0 else PatchRelease
+
+ # prevent instantiating release object directly
+ obj = klass.__new__(klass)
+ obj.version = version
+ obj.jira = jira
+ obj.repo = repo
+
+ return obj
+
+ @property
+ def is_released(self):
+ return self.version.released
+
+ @property
+ def tag(self):
+ return "apache-arrow-{}".format(str(self.version))
+
+ @property
+ def branch(self):
+ # TODO(kszucs): add apache remote
+ return "master"
+
+ @cached_property
+ def previous(self):
+ # select all non-patch releases
+ versions = [v for v in self.jira.arrow_versions() if v.patch == 0]
+ position = versions.index(self.version) + 1
+ if position == len(versions):
+ # first release doesn't have a previous one
+ return None
+ previous = versions[position]
+ return Release.from_jira(previous)
+
+ @cached_property
+ def issues(self):
+ return {i.key: i for i in self.jira.arrow_issues(self.version)}
+
+ @cached_property
+ def commits(self):
+ """
+ All commits applied between two versions on the master branch.
+ """
+ if self.previous is None:
+ # first release
+ lower = ''
+ else:
+ lower = self.repo.tags[self.previous.tag]
+
+ if self.version.released:
+ upper = self.repo.tags[self.tag]
+ else:
+ try:
+ upper = self.repo.branches[self.branch]
+ except IndexError:
+ warnings.warn("Release branch `{}` doesn't exist."
+ .format(self.branch))
+ return []
+
+ commit_range = "{}..{}".format(lower, upper)
+ return list(map(Commit, self.repo.iter_commits(commit_range)))
+
+ def curate(self):
+ # handle commits with parquet issue key specially and query them from
+ # jira and add it to the issues
+ release_issues = self.issues
+
+ within, outside, nojira, parquet = [], [], [], []
+ for c in self.commits:
+ if c.issue is None:
+ nojira.append(c)
+ elif c.issue in release_issues:
+ within.append((release_issues[c.issue], c))
+ elif c.project == 'PARQUET':
+ parquet.append((self.jira.issue(c.issue), c))
+ else:
+ outside.append((self.jira.issue(c.issue), c))
+
+ # remaining jira tickets
+ within_keys = {i.key for i, c in within}
+ nopatch = [issue for key, issue in release_issues.items()
+ if key not in within_keys]
+
+ return ReleaseCuration(release=self, within=within, outside=outside,
+ nojira=nojira, parquet=parquet, nopatch=nopatch)
+
+ def changelog(self):
+ release_issues = []
+
+ # get organized report for the release
+ curation = self.curate()
+
+ # jira tickets having patches in the release
+ for issue, _ in curation.within:
+ release_issues.append(issue)
+
+ # jira tickets without patches
+ for issue in curation.nopatch:
+ release_issues.append(issue)
+
+ # parquet patches in the release
+ for issue, _ in curation.parquet:
+ release_issues.append(issue)
+
+ # organize issues into categories
+ issue_types = {
+ 'Bug': 'Bug Fixes',
+ 'Improvement': 'New Features and Improvements',
+ 'New Feature': 'New Features and Improvements',
+ 'Sub-task': 'New Features and Improvements',
+ 'Task': 'New Features and Improvements',
+ 'Test': 'Bug Fixes',
+ 'Wish': 'New Features and Improvements',
+ }
+ categories = defaultdict(list)
+ for issue in release_issues:
+ categories[issue_types[issue.type]].append(issue)
+
+ # sort issues by the issue key in ascending order
+ for name, issues in categories.items():
+ issues.sort(key=lambda issue: (issue.project, issue.number))
+
+ return JiraChangelog(release=self, categories=categories)
+
+
+class PatchRelease(Release):
+
+ @property
+ def branch(self):
+ # TODO(kszucs): add apache remote
+ return "maint-{}.{}.x".format(self.version.major, self.version.minor)
+
+ @cached_property
+ def previous(self):
+ # select all releases under this minor
+ versions = [v for v in self.jira.arrow_versions()
+ if v.minor == self.version.minor]
+ previous = versions[versions.index(self.version) + 1]
+ return Release.from_jira(previous)
+
+ def generate_update_branch_commands(self):
+ # cherry pick not yet cherry picked commits on top of the maintenance
+ # branch
+ try:
+ target = self.repo.branches[self.branch]
+ except IndexError:
+ # maintenance branch doesn't exist yet, so create one based off of
+ # the previous git tag
+ target = self.repo.create_head(self.branch, self.previous.tag)
+
+ # collect commits applied on master since the root of the maintenance
+ # branch (the minor release of this patch release)
+ commit_range = "apache-arrow-{}.{}.0..master".format(
+ self.version.major, self.version.minor
+ )
+ commits = list(map(Commit, self.repo.iter_commits(commit_range)))
+
+ # iterate over commits applied on master and keep the original order of
+ # the commits to minimize the merge conflicts during cherry-picks
+ patch_commits = [c for c in commits if c.issue in self.issues]
+
+ commands = [
+ 'git checkout -b {} {}'.format(target, self.previous.tag)
+ ]
+ for c in reversed(patch_commits):
+ commands.append(
+ 'git cherry-pick {} # {}'.format(c.hexsha, c.title)
+ )
+
+ return commands
+
+ # TODO(kszucs): update_branch method which tries to cherry pick to a
+ # temporary branch and if the patches apply cleanly then update the maint
+ # reference
diff --git a/dev/release/update-changelog.sh
b/dev/archery/archery/templates/release_changelog.md.j2
old mode 100755
new mode 100644
similarity index 71%
rename from dev/release/update-changelog.sh
rename to dev/archery/archery/templates/release_changelog.md.j2
index 4fda016..c0406dd
--- a/dev/release/update-changelog.sh
+++ b/dev/archery/archery/templates/release_changelog.md.j2
@@ -1,5 +1,4 @@
-#!/bin/bash
-#
+{#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -16,16 +15,15 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-#
-set -e
-
-SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+#}
+# Apache Arrow {{ release.version }} ({{ release.version.release_date or
today() }})
-version=$1
+{% for category, issues in categories.items() -%}
-CHANGELOG=$SOURCE_DIR/../../CHANGELOG.md
+## {{ category }}
-${PYTHON:-python} $SOURCE_DIR/changelog.py $version 0 $CHANGELOG
+{% for issue in issues -%}
+* [{{ issue.key }}](https://issues.apache.org/jira/browse/{{ issue.key }}) -
{{ issue.summary | md }}
+{% endfor %}
-git add $CHANGELOG
-git commit -m "[Release] Update CHANGELOG.md for $version"
+{% endfor %}
diff --git a/dev/archery/archery/templates/release_curation.txt.j2
b/dev/archery/archery/templates/release_curation.txt.j2
new file mode 100644
index 0000000..a5d11e9
--- /dev/null
+++ b/dev/archery/archery/templates/release_curation.txt.j2
@@ -0,0 +1,41 @@
+{#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#}
+Total number of JIRA tickets assigned to version {{ release.version }}: {{
release.issues|length }}
+
+Total number of applied patches since version {{ release.previous.version }}:
{{ release.commits|length }}
+
+Patches with assigned issue in version {{ release.version }}:
+{% for issue, commit in within -%}
+ - {{ commit.url }} {{ commit.title }}
+{% endfor %}
+
+Patches with assigned issue outside of version {{ release.version }}:
+{% for issue, commit in outside -%}
+ - {{ commit.url }} {{ commit.title }}
+{% endfor %}
+
+Patches in version {{ release.version }} without a linked issue:
+{% for commit in nojira -%}
+ - {{ commit.url }} {{ commit.title }}
+{% endfor %}
+
+JIRA issues in version {{ release.version }} without a linked patch:
+{% for issue in nopatch -%}
+ - https://issues.apache.org/jira/browse/{{ issue.key }}
+{% endfor %}
diff --git a/dev/archery/archery/utils/report.py
b/dev/archery/archery/utils/report.py
new file mode 100644
index 0000000..6c7587d
--- /dev/null
+++ b/dev/archery/archery/utils/report.py
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from abc import ABCMeta, abstractmethod
+import datetime
+
+import jinja2
+
+
+def markdown_escape(s):
+ for char in ('*', '#', '_', '~', '`', '>'):
+ s = s.replace(char, '\\' + char)
+ return s
+
+
+class Report(metaclass=ABCMeta):
+
+ def __init__(self, **kwargs):
+ for field in self.fields:
+ if field not in kwargs:
+ raise ValueError('Missing keyword argument {}'.format(field))
+ self._data = kwargs
+
+ def __getattr__(self, key):
+ return self._data[key]
+
+ @abstractmethod
+ def fields(self):
+ pass
+
+ @property
+ @abstractmethod
+ def templates(self):
+ pass
+
+
+class JinjaReport(Report):
+
+ def __init__(self, **kwargs):
+ self.env = jinja2.Environment(
+ loader=jinja2.PackageLoader('archery', 'templates')
+ )
+ self.env.filters['md'] = markdown_escape
+ self.env.globals['today'] = datetime.date.today
+ super().__init__(**kwargs)
+
+ def render(self, template_name):
+ template_path = self.templates[template_name]
+ template = self.env.get_template(template_path)
+ return template.render(**self._data)
diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index dd1b555..8823ace 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -27,7 +27,8 @@ if sys.version_info < (3, 5):
extras = {
'benchmark': ['pandas'],
'bot': ['ruamel.yaml', 'pygithub'],
- 'docker': ['ruamel.yaml', 'python-dotenv']
+ 'docker': ['ruamel.yaml', 'python-dotenv'],
+ 'release': ['jinja2', 'jira', 'semver', 'gitpython']
}
extras['all'] = list(set(functools.reduce(operator.add, extras.values())))
@@ -45,6 +46,7 @@ setup(
'archery.lang',
'archery.utils'
],
+ include_package_data=True,
install_requires=['click>=7'],
tests_require=['pytest', 'responses'],
extras_require=extras,
diff --git a/dev/release/00-prepare.sh b/dev/release/00-prepare.sh
index 2e357f0..7b054e9 100755
--- a/dev/release/00-prepare.sh
+++ b/dev/release/00-prepare.sh
@@ -195,7 +195,9 @@ tag=apache-arrow-${version}
if [ ${PREPARE_CHANGELOG} -gt 0 ]; then
echo "Updating changelog for $version"
# Update changelog
- $SOURCE_DIR/update-changelog.sh $version
+ archery release changelog add $version
+ git add ${SOURCE_DIR}/../../CHANGELOG.md
+ git commit -m "[Release] Update CHANGELOG.md for $version"
fi
if [ ${PREPARE_LINUX_PACKAGES} -gt 0 ]; then
diff --git a/dev/release/changelog.py b/dev/release/changelog.py
deleted file mode 100755
index 4e85e46..0000000
--- a/dev/release/changelog.py
+++ /dev/null
@@ -1,247 +0,0 @@
-#!/usr/bin/env python
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Utility for generating changelogs for fix versions
-# requirements: pip install jira
-# Set $JIRA_USERNAME, $JIRA_PASSWORD environment variables
-
-from __future__ import print_function
-
-from collections import defaultdict
-from datetime import datetime
-from io import StringIO
-import locale
-import os
-import re
-import sys
-
-import jira.client
-
-# ASF JIRA username
-JIRA_USERNAME = os.environ["APACHE_JIRA_USERNAME"]
-# ASF JIRA password
-JIRA_PASSWORD = os.environ["APACHE_JIRA_PASSWORD"]
-
-JIRA_API_BASE = "https://issues.apache.org/jira"
-
-asf_jira = jira.client.JIRA(options={'server': JIRA_API_BASE},
- basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
-
-
-locale.setlocale(locale.LC_ALL, 'en_US.utf-8')
-
-
-release_dir = os.path.realpath(os.path.dirname(__file__))
-ARROW_ROOT_DEFAULT = os.path.join(release_dir, '..', '..')
-ARROW_ROOT = os.environ.get("ARROW_ROOT", ARROW_ROOT_DEFAULT)
-
-
-def get_issues_for_version(version):
- jql = ("project=ARROW "
- "AND fixVersion='{0}' "
- "AND status = Resolved "
- "AND resolution in (Fixed, Done) "
- "ORDER BY issuetype DESC").format(version)
-
- return asf_jira.search_issues(jql, maxResults=9999)
-
-
-def get_last_major_version(current_version):
- # TODO: This doesn't work for generating a changelog for the _first_ major
- # release, but we probably don't care
- major_versions = [
- v for v in asf_jira.project('ARROW').versions
- if v.name[0].isdigit() and v.name.split('.')[-1] == '0'
- ]
-
- # Sort the versions
- def sort_version(x):
- major, minor, patch = x.name.split('.')
- return int(major), int(minor)
-
- major_versions.sort(key=sort_version)
-
- # Find index of version being released
- current_version_index = ([x.name for x in major_versions]
- .index(current_version))
-
- return major_versions[current_version_index - 1]
-
-
-def get_jiras_from_git_changelog(current_version):
- # We use this to get the resolved PARQUET JIRAs
- from subprocess import check_output
-
- last_major_version = get_last_major_version(current_version)
-
- # Path to .git directory
- git_dir = os.path.join(ARROW_ROOT, '.git')
-
- cmd = ['git', '--git-dir', git_dir, 'log', '--pretty=format:%s',
- 'apache-arrow-{}..apache-arrow-{}'.format(last_major_version,
- current_version)]
- output = check_output(cmd).decode('utf-8')
-
- resolved_jiras = []
- regex = re.compile(r'[a-zA-Z]+-[0-9]+')
- for desc in output.splitlines():
- maybe_jira = desc.split(':')[0]
-
- # Sometimes people forget the colon
- maybe_jira = maybe_jira.split(' ')[0]
- if regex.match(maybe_jira):
- resolved_jiras.append(maybe_jira)
-
- return resolved_jiras
-
-
-LINK_TEMPLATE = '[{0}](https://issues.apache.org/jira/browse/{0})'
-
-
-def format_changelog_markdown(issues, out):
- issues_by_type = defaultdict(list)
- for issue in issues:
- issues_by_type[issue.fields.issuetype.name].append(issue)
-
- for issue_type, issue_group in sorted(issues_by_type.items()):
- issue_group.sort(key=lambda x: x.key)
-
- out.write('## {0}\n\n'.format(_escape_for_markdown(issue_type)))
- for issue in issue_group:
- markdown_summary = _escape_for_markdown(issue.fields.summary)
- out.write('* {0} - {1}\n'.format(issue.key,
- markdown_summary))
- out.write('\n')
-
-
-def _escape_for_markdown(x):
- return (
- x.replace('_', r'\_') # underscores
- .replace('`', r'\`') # backticks
- .replace('*', r'\*') # asterisks
- )
-
-
-def format_changelog_website(issues, out):
- NEW_FEATURE = 'New Features and Improvements'
- BUGFIX = 'Bug Fixes'
-
- CATEGORIES = {
- 'New Feature': NEW_FEATURE,
- 'Improvement': NEW_FEATURE,
- 'Wish': NEW_FEATURE,
- 'Task': NEW_FEATURE,
- 'Test': BUGFIX,
- 'Bug': BUGFIX,
- 'Sub-task': NEW_FEATURE
- }
-
- issues_by_category = defaultdict(list)
- for issue in issues:
- issue_type = issue.fields.issuetype.name
- website_category = CATEGORIES[issue_type]
- issues_by_category[website_category].append(issue)
-
- WEBSITE_ORDER = [NEW_FEATURE, BUGFIX]
-
- for issue_category in WEBSITE_ORDER:
- issue_group = issues_by_category[issue_category]
- issue_group.sort(key=lambda x: x.key)
-
- out.write('## {0}\n\n'.format(issue_category))
- for issue in issue_group:
- name = LINK_TEMPLATE.format(issue.key)
- markdown_summary = _escape_for_markdown(issue.fields.summary)
- out.write('* {0} - {1}\n'
- .format(name, markdown_summary))
- out.write('\n')
-
-
-def get_resolved_parquet_issues(version):
- git_resolved_jiras = set(get_jiras_from_git_changelog(version))
-
- # We don't assume that resolved Parquet issues are found in a single Fix
- # Version, so for now we query them all and then select only the ones that
- # are found in the git log
- jql = ("project=PARQUET "
- "AND component='parquet-cpp' "
- "AND status = Resolved "
- "AND resolution in (Fixed, Done) "
- "ORDER BY issuetype DESC")
-
- all_issues = asf_jira.search_issues(jql, maxResults=9999)
- return [issue for issue in all_issues if issue.key in git_resolved_jiras]
-
-
-def get_changelog(version, for_website=False):
- issues_for_version = get_issues_for_version(version)
-
- # Infer resolved Parquet issues, since these can only really be known by
- # looking at the git log
- parquet_issues = get_resolved_parquet_issues(version)
- issues_for_version.extend(parquet_issues)
-
- buf = StringIO()
-
- if for_website:
- format_changelog_website(issues_for_version, buf)
- else:
- format_changelog_markdown(issues_for_version, buf)
-
- return buf.getvalue()
-
-
-def append_changelog(version, changelog_path):
- new_changelog = get_changelog(version)
-
- with open(changelog_path, 'r') as f:
- old_changelog = f.readlines()
-
- result = StringIO()
- # Header
- print(''.join(old_changelog[:18]), file=result)
-
- # New version
- today = datetime.today().strftime('%d %B %Y')
- print('# Apache Arrow {0} ({1})'.format(version, today),
- end='', file=result)
- print('\n', file=result)
- print(new_changelog, end='', file=result)
-
- # Prior versions
- print(''.join(old_changelog[19:]), file=result)
-
- with open(changelog_path, 'w') as f:
- f.write(result.getvalue().rstrip() + '\n')
-
-
-if __name__ == '__main__':
- if len(sys.argv) < 2:
- print('Usage: changelog.py $FIX_VERSION [$IS_WEBSITE] '
- '[$CHANGELOG_TO_UPDATE]')
-
- for_website = len(sys.argv) > 2 and sys.argv[2] == '1'
-
- version = sys.argv[1]
-
- if len(sys.argv) > 3:
- changelog_path = sys.argv[3]
- append_changelog(version, changelog_path)
- else:
- print(get_changelog(version, for_website=for_website))
diff --git a/dev/release/post-03-website.sh b/dev/release/post-03-website.sh
index 58121f5..08e5f3a 100755
--- a/dev/release/post-03-website.sh
+++ b/dev/release/post-03-website.sh
@@ -140,6 +140,7 @@ cat <<ANNOUNCE >> "${announce_file}"
ANNOUNCE
+# TODO(kszucs): needs to update for the new changelog generation with archery
${PYTHON:-python} "${SOURCE_DIR}/changelog.py" ${version} 1 | \
sed -e 's/^#/##/g' >> "${announce_file}"
diff --git a/dev/release/rat_exclude_files.txt
b/dev/release/rat_exclude_files.txt
index 158790d..25fff19 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -10,6 +10,7 @@
.github/ISSUE_TEMPLATE/question.md
ci/etc/rprofile
ci/etc/*.patch
+CHANGELOG.md
cpp/CHANGELOG_PARQUET.md
cpp/src/arrow/io/mman.h
cpp/src/arrow/util/random.h
@@ -41,6 +42,7 @@ cpp/src/plasma/thirdparty/dlmalloc.c
cpp/thirdparty/flatbuffers/include/flatbuffers/base.h
cpp/thirdparty/flatbuffers/include/flatbuffers/flatbuffers.h
cpp/thirdparty/flatbuffers/include/flatbuffers/stl_emulation.h
+dev/archery/MANIFEST.in
dev/archery/requirements*.txt
dev/archery/archery/tests/fixtures/*
dev/release/rat_exclude_files.txt
diff --git a/dev/tasks/crossbow.py b/dev/tasks/crossbow.py
index adabe66..cb14de6 100755
--- a/dev/tasks/crossbow.py
+++ b/dev/tasks/crossbow.py
@@ -29,7 +29,7 @@ import textwrap
from io import StringIO
from pathlib import Path
from textwrap import dedent
-from datetime import datetime, date
+from datetime import date
from functools import partial
import click
@@ -66,19 +66,6 @@ requests_log.propagate = True
CWD = Path(__file__).parent.absolute()
-NEW_FEATURE = 'New Features and Improvements'
-BUGFIX = 'Bug Fixes'
-
-
-def md(template, *args, **kwargs):
- """Wraps string.format with naive markdown escaping"""
- def escape(s):
- for char in ('*', '#', '_', '~', '`', '>'):
- s = s.replace(char, '\\' + char)
- return s
- return template.format(*map(escape, args), **toolz.valmap(escape, kwargs))
-
-
def unflatten(mapping):
"""Converts a flat tuple => object mapping to hierarchical one"""
result = {}
@@ -162,95 +149,6 @@ _default_tree = {
}
-class JiraChangelog:
-
- def __init__(self, version, username, password,
- server='https://issues.apache.org/jira'):
- import jira.client
- self.server = server
- # clean version to the first numbers
- self.version = '.'.join(version.split('.')[:3])
- query = ("project=ARROW "
- "AND fixVersion='{0}' "
- "AND status = Resolved "
- "AND resolution in (Fixed, Done) "
- "ORDER BY issuetype DESC").format(self.version)
- self.client = jira.client.JIRA({'server': server},
- basic_auth=(username, password))
- self.issues = self.client.search_issues(query, maxResults=9999)
-
- def format_markdown(self):
- out = StringIO()
-
- issues_by_type = toolz.groupby(lambda i: i.fields.issuetype.name,
- self.issues)
- for typename, issues in sorted(issues_by_type.items()):
- issues.sort(key=lambda x: x.key)
-
- out.write(md('## {}\n\n', typename))
- for issue in issues:
- out.write(md('* {} - {}\n', issue.key, issue.fields.summary))
- out.write('\n')
-
- return out.getvalue()
-
- def format_website(self):
- # jira category => website category mapping
- categories = {
- 'New Feature': 'feature',
- 'Improvement': 'feature',
- 'Wish': 'feature',
- 'Task': 'feature',
- 'Test': 'bug',
- 'Bug': 'bug',
- 'Sub-task': 'feature'
- }
- titles = {
- 'feature': 'New Features and Improvements',
- 'bugfix': 'Bug Fixes'
- }
-
- issues_by_category = toolz.groupby(
- lambda issue: categories[issue.fields.issuetype.name],
- self.issues
- )
-
- out = StringIO()
-
- for category in ('feature', 'bug'):
- title = titles[category]
- issues = issues_by_category[category]
- issues.sort(key=lambda x: x.key)
-
- out.write(md('## {}\n\n', title))
- for issue in issues:
- link = md('[{0}]({1}/browse/{0})', issue.key, self.server)
- out.write(md('* {} - {}\n', link, issue.fields.summary))
- out.write('\n')
-
- return out.getvalue()
-
- def render(self, old_changelog, website=False):
- old_changelog = old_changelog.splitlines()
- if website:
- new_changelog = self.format_website()
- else:
- new_changelog = self.format_markdown()
-
- out = StringIO()
-
- # Apache license header
- out.write('\n'.join(old_changelog[:18]))
-
- # Newly generated changelog
- today = datetime.today().strftime('%d %B %Y')
- out.write(md('\n\n# Apache Arrow {} ({})\n\n', self.version, today))
- out.write(new_changelog)
- out.write('\n'.join(old_changelog[19:]))
-
- return out.getvalue().strip()
-
-
class GitRemoteCallbacks(PygitRemoteCallbacks):
def __init__(self, token):
@@ -1489,38 +1387,6 @@ def crossbow(ctx, github_token, arrow_path, queue_path,
queue_remote,
@crossbow.command()
[email protected]('--changelog-path', '-c', type=click.Path(exists=True),
- default=str(DEFAULT_ARROW_PATH / 'CHANGELOG.md'),
- help='Path of changelog to update')
[email protected]('--arrow-version', '-v', default=None,
- help='Set target version explicitly')
[email protected]('--is-website', '-w', default=False, is_flag=True,
- help='Whether to use website format for changelog. ')
[email protected]('--jira-username', '-u', default=None, help='JIRA username')
[email protected]('--jira-password', '-P', default=None, help='JIRA password')
[email protected]('--dry-run/--write', default=False,
- help='Just display the new changelog, don\'t write it')
[email protected]_obj
-def changelog(obj, changelog_path, arrow_version, is_website, jira_username,
- jira_password, dry_run):
- changelog_path = Path(changelog_path)
- target = Target.from_repo(obj['arrow'])
- version = arrow_version or target.version
-
- changelog = JiraChangelog(version, username=jira_username,
- password=jira_password)
- new_content = changelog.render(changelog_path.read_text(),
- website=is_website)
-
- if dry_run:
- click.echo(new_content)
- else:
- changelog_path.write_text(new_content)
- click.echo('New changelog successfully generated, see git diff for the'
- 'changes')
-
-
[email protected]()
@click.option('--config-path', '-c',
type=click.Path(exists=True), default=DEFAULT_CONFIG_PATH,
help='Task configuration yml. Defaults to tasks.yml')