qa/document_analyser.py | 158 ++++++++++++++++++++++++------------------------ 1 file changed, 79 insertions(+), 79 deletions(-)
New commits: commit 579b7d79ac016f8947994a53a9d1ed11869901d6 Author: Xisco Fauli <xiscofa...@libreoffice.org> AuthorDate: Fri Sep 24 10:42:02 2021 +0200 Commit: Xisco Fauli <xiscofa...@libreoffice.org> CommitDate: Fri Sep 24 10:52:20 2021 +0200 fix ‘python3\r’: No such file or directory and ... ... remove trailing whitespaces Change-Id: I5a0b0bbf05ba0c72ccaf36dd9d541730b1698e72 diff --git a/qa/document_analyser.py b/qa/document_analyser.py old mode 100644 new mode 100755 index 06bc98a..206c573 --- a/qa/document_analyser.py +++ b/qa/document_analyser.py @@ -1,79 +1,79 @@ -#!/usr/bin/env python3 -# -# This file is part of the LibreOffice project. -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# - -""" -Document analyser uses the odfpy module: https://pypi.org/project/odfpy/ - -This script prints: -bookmark count, cell count, changetracking count, character count, -comment count, draw count, frame count, hyperlink count, -image count, non-whitespace character count, object count, OLE object count, -page count, paragraph count, row count, sentence count, -syllable count, table count, textbox count, word count, and paragraph styles. - -""" - -import odf -from odf.namespaces import TEXTNS -from odf.element import Element -from odf.opendocument import load -from odf import text,meta,office,draw - - -print("Enter filename: ") -filename=input() - -doc=load(filename) - -#--------------------document statistics from the odf.meta module-------------------- -print("\nDOCUMENT STATISTICS\n") -for stat in doc.getElementsByType(meta.DocumentStatistic): - print("Cell count",stat.getAttribute('cellcount')) - print("Character count:",stat.getAttribute('charactercount')) - print("Draw count:",stat.getAttribute('drawcount')) - print("Frame count:",stat.getAttribute('framecount')) - print("Image count:",stat.getAttribute('imagecount')) - print("Non-whitespace character count:",stat.getAttribute('nonwhitespacecharactercount')) - print("Object count:",stat.getAttribute('objectcount')) - print("Object linking and embedding (OLE) object count:",stat.getAttribute('oleobjectcount')) - print("Page count:",stat.getAttribute('pagecount')) - print("Paragraph count:",stat.getAttribute('paragraphcount')) - print("Row count:",stat.getAttribute('rowcount')) - print("Sentence count:",stat.getAttribute('sentencecount')) - print("Syllable count:",stat.getAttribute('syllablecount')) - print("Table count:",stat.getAttribute('tablecount')) - print("Word count:",stat.getAttribute('wordcount')) - - -#--------------------type counter for attributes not covered by odf.meta.DocumentStatistic-------------------- -def type_counter(doc,type): - count=0 - for element in doc.getElementsByType(type): - count+=1 - return count - -types={ - 'Bookmark':text.Bookmark, - 'Changetracking':text.FormatChange, - 'Comment':office.Annotation, - 'Hyperlink':text.A, - 'Textbox':draw.TextBox -} - -for key,value in types.items(): - print(key,'count:',type_counter(doc,value)) - -#--------------------paragraph styles-------------------- -def paragraph_style(doc): - i = 1 - for paragraph in doc.getElementsByType(text.P): - print('Paragraph',i,'style:',paragraph.getAttribute('stylename')) - i+=1 - -paragraph_style(doc) +#!/usr/bin/env python3 +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# + +""" +Document analyser uses the odfpy module: https://pypi.org/project/odfpy/ + +This script prints: +bookmark count, cell count, changetracking count, character count, +comment count, draw count, frame count, hyperlink count, +image count, non-whitespace character count, object count, OLE object count, +page count, paragraph count, row count, sentence count, +syllable count, table count, textbox count, word count, and paragraph styles. + +""" + +import odf +from odf.namespaces import TEXTNS +from odf.element import Element +from odf.opendocument import load +from odf import text,meta,office,draw + + +print("Enter filename: ") +filename=input() + +doc=load(filename) + +#--------------------document statistics from the odf.meta module-------------------- +print("\nDOCUMENT STATISTICS\n") +for stat in doc.getElementsByType(meta.DocumentStatistic): + print("Cell count",stat.getAttribute('cellcount')) + print("Character count:",stat.getAttribute('charactercount')) + print("Draw count:",stat.getAttribute('drawcount')) + print("Frame count:",stat.getAttribute('framecount')) + print("Image count:",stat.getAttribute('imagecount')) + print("Non-whitespace character count:",stat.getAttribute('nonwhitespacecharactercount')) + print("Object count:",stat.getAttribute('objectcount')) + print("Object linking and embedding (OLE) object count:",stat.getAttribute('oleobjectcount')) + print("Page count:",stat.getAttribute('pagecount')) + print("Paragraph count:",stat.getAttribute('paragraphcount')) + print("Row count:",stat.getAttribute('rowcount')) + print("Sentence count:",stat.getAttribute('sentencecount')) + print("Syllable count:",stat.getAttribute('syllablecount')) + print("Table count:",stat.getAttribute('tablecount')) + print("Word count:",stat.getAttribute('wordcount')) + + +#--------------------type counter for attributes not covered by odf.meta.DocumentStatistic-------------------- +def type_counter(doc,type): + count=0 + for element in doc.getElementsByType(type): + count+=1 + return count + +types={ + 'Bookmark':text.Bookmark, + 'Changetracking':text.FormatChange, + 'Comment':office.Annotation, + 'Hyperlink':text.A, + 'Textbox':draw.TextBox +} + +for key,value in types.items(): + print(key,'count:',type_counter(doc,value)) + +#--------------------paragraph styles-------------------- +def paragraph_style(doc): + i = 1 + for paragraph in doc.getElementsByType(text.P): + print('Paragraph',i,'style:',paragraph.getAttribute('stylename')) + i+=1 + +paragraph_style(doc)