[PATCH] GSoC Miniproject 15. Rewrite fsck.c:fsck_commit()

2014-03-20 Thread Ashwin Jha
modified fsck.c:fsck_commit(). Replaced memcmp() with starts_with() function.
starts_with() seems much more relevant than memcmp(). It uses one less argument
and its return value makes more sense.
skip_prefix() is not used as it uses strcmp() internally which seems 
unnecessarily
for current task. The current task can be easily done by providing offsets to 
the
buffer pointer (the way it is implemented currently).

Signed-off-by: Ashwin Jha 
---
 fsck.c |   11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/fsck.c b/fsck.c
index 64bf279..82e1640 100644
--- a/fsck.c
+++ b/fsck.c
@@ -6,6 +6,7 @@
 #include "commit.h"
 #include "tag.h"
 #include "fsck.h"
+#include "strbuf.h"
 
 static int fsck_walk_tree(struct tree *tree, fsck_walk_func walk, void *data)
 {
@@ -290,12 +291,12 @@ static int fsck_commit(struct commit *commit, fsck_error 
error_func)
int parents = 0;
int err;
 
-   if (memcmp(buffer, "tree ", 5))
+   if (!starts_with(buffer, "tree "))
return error_func(&commit->object, FSCK_ERROR, "invalid format 
- expected 'tree' line");
if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n')
return error_func(&commit->object, FSCK_ERROR, "invalid 'tree' 
line format - bad sha1");
buffer += 46;
-   while (!memcmp(buffer, "parent ", 7)) {
+   while (starts_with(buffer, "parent ")) {
if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
return error_func(&commit->object, FSCK_ERROR, "invalid 
'parent' line format - bad sha1");
buffer += 48;
@@ -322,15 +323,15 @@ static int fsck_commit(struct commit *commit, fsck_error 
error_func)
if (p || parents)
return error_func(&commit->object, FSCK_ERROR, "parent 
objects missing");
}
-   if (memcmp(buffer, "author ", 7))
+   if (!starts_with(buffer, "author "))
return error_func(&commit->object, FSCK_ERROR, "invalid format 
- expected 'author' line");
buffer += 7;
err = fsck_ident(&buffer, &commit->object, error_func);
if (err)
return err;
-   if (memcmp(buffer, "committer ", strlen("committer ")))
+   if (!starts_with(buffer, "committer "))
return error_func(&commit->object, FSCK_ERROR, "invalid format 
- expected 'committer' line");
-   buffer += strlen("committer ");
+   buffer += 10;
err = fsck_ident(&buffer, &commit->object, error_func);
if (err)
return err;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] GSoC Miniproject 15. Rewrite fsck.c:fsck_commit()

2014-03-21 Thread Ashwin Jha


 Original Message 
Subject:[PATCH] GSoC Miniproject 15. Rewrite fsck.c:fsck_commit()
Date:   Fri, 21 Mar 2014 07:24:46 +0530
From:   Ashwin Jha 
To: git@vger.kernel.org
CC: Ashwin Jha 



modified fsck.c:fsck_commit(). Replaced memcmp() with starts_with() function.
starts_with() seems much more relevant than memcmp(). It uses one less argument
and its return value makes more sense.
skip_prefix() is not used as it uses strcmp() internally which seems 
unnecessarily
for current task. The current task can be easily done by providing offsets to 
the
buffer pointer (the way it is implemented currently).

Signed-off-by: Ashwin Jha 
---
 fsck.c |   11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/fsck.c b/fsck.c
index 64bf279..82e1640 100644
--- a/fsck.c
+++ b/fsck.c
@@ -6,6 +6,7 @@
 #include "commit.h"
 #include "tag.h"
 #include "fsck.h"
+#include "strbuf.h"
 
 static int fsck_walk_tree(struct tree *tree, fsck_walk_func walk, void *data)

 {
@@ -290,12 +291,12 @@ static int fsck_commit(struct commit *commit, fsck_error 
error_func)
int parents = 0;
int err;
 
-	if (memcmp(buffer, "tree ", 5))

+   if (!starts_with(buffer, "tree "))
return error_func(&commit->object, FSCK_ERROR, "invalid format - 
expected 'tree' line");
if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n')
return error_func(&commit->object, FSCK_ERROR, "invalid 'tree' line 
format - bad sha1");
buffer += 46;
-   while (!memcmp(buffer, "parent ", 7)) {
+   while (starts_with(buffer, "parent ")) {
if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
return error_func(&commit->object, FSCK_ERROR, "invalid 
'parent' line format - bad sha1");
buffer += 48;
@@ -322,15 +323,15 @@ static int fsck_commit(struct commit *commit, fsck_error 
error_func)
if (p || parents)
return error_func(&commit->object, FSCK_ERROR, "parent 
objects missing");
}
-   if (memcmp(buffer, "author ", 7))
+   if (!starts_with(buffer, "author "))
return error_func(&commit->object, FSCK_ERROR, "invalid format - 
expected 'author' line");
buffer += 7;
err = fsck_ident(&buffer, &commit->object, error_func);
if (err)
return err;
-   if (memcmp(buffer, "committer ", strlen("committer ")))
+   if (!starts_with(buffer, "committer "))
return error_func(&commit->object, FSCK_ERROR, "invalid format - 
expected 'committer' line");
-   buffer += strlen("committer ");
+   buffer += 10;
err = fsck_ident(&buffer, &commit->object, error_func);
if (err)
return err;
--
1.7.9.5



--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] GSoC Miniproject 15. Rewrite fsck.c:fsck_commit()

2014-03-21 Thread Ashwin Jha


On 03/22/2014 12:11 AM, Eric Sunshine wrote:

On Fri, Mar 21, 2014 at 5:18 AM, Ashwin Jha  wrote:

On Fri, Mar 21, 2014 at 9:03 AM, Eric Sunshine 
wrote:

On Thu, Mar 20, 2014 at 9:54 PM, Ashwin Jha  wrote:

Subject: [PATCH] GSoC Miniproject 15. Rewrite fsck.c:fsck_commit()
starts_with() seems much more relevant than memcmp(). It uses one less
argument and its return value makes more sense.

As a justification, "uses one less argument" falls flat, and really
has nothing to do with the decision to make the change. The bit about
the return value is a slightly better but is still weak.

You might instead justify the change by pointing out that the name
starts_with()
does a better job of conveying the intention of the code, which is to
check the string for a prefix, than does memcmp().

Actually, from the line "starts_with() seems much more relevant than
memcmp()" my intention was to say that "starts_with() does a better job of
conveying the intention of the code, which is to check the string for a
prefix, than does memcmp()" as mentioned by you.

Good to hear. When you resubmit (if you do), perhaps use that wording
or something similar to justify the change.


skip_prefix() is not used as it uses strcmp() internally which seems
unnecessarily
for current task. The current task can be easily done by providing
offsets to the
buffer pointer (the way it is implemented currently).

Not sure what this means. What is the "current task", and what is
implemented where currently?

 From current task, I meant to say the task of offsetting the buffer pointer
to get the correct substring as in:
get_sha1_hex(buffer+5, tree_sha1)

Please forgive me for this. I should have written this in a better way.

Thanks for the clarification.


Signed-off-by: Ashwin Jha 
---
  fsck.c |   11 ++-
  1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/fsck.c b/fsck.c
index 64bf279..82e1640 100644
--- a/fsck.c
+++ b/fsck.c
@@ -6,6 +6,7 @@
  #include "commit.h"
  #include "tag.h"
  #include "fsck.h"
+#include "strbuf.h"

  static int fsck_walk_tree(struct tree *tree, fsck_walk_func walk, void
*data)
  {
@@ -290,12 +291,12 @@ static int fsck_commit(struct commit *commit,
fsck_error error_func)
 int parents = 0;
 int err;

-   if (memcmp(buffer, "tree ", 5))
+   if (!starts_with(buffer, "tree "))
 return error_func(&commit->object, FSCK_ERROR, "invalid
format - expected 'tree' line");
 if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n')

One of the benefits of starts_with() and skip_prefix() is that they
allow you to eliminate magic numbers, such as 5 in the memcmp()
invocation. However, if you look a couple lines below, you see in the
expression 'buffer+5' that the magic number is still present. In fact,
the code becomes less clear with your change because the 5 in
'buffer+5' is much more mysterious without the preceding
memcmp(foo,"bar",5). It is possible to eliminate this magic number,
but starts_with() is not the answer.


I considered this point while making the changes. But, I thought that since
all that is required is a constant offset to the buffer pointer, using
skip_prefix() will only add to the overhead of function calling.

 return error_func(&commit->object, FSCK_ERROR, "invalid
'tree' line format - bad sha1");
 buffer += 46;

And as you can see here (buffer +=46) will still be a problem even if I
replace the buffer+5 code.
I think a more better way would be to define these magic no. as macros.

But, I guess you are right. The current changes do make it a bit unclear.

I understand your argument: since magic numbers remain elsewhere, then
little is gained by eliminating only a few of them via skip_prefix().
A counterargument might be that even that small gain can be a
maintenance bonus, since it reduces the number of potential places
where errors can be made when modifying the code. (But you are welcome
to counter that argument if you feel strongly about it.)


To summarize, I can think of two ways:
1. skip_prefix() can be used, in place of both starts_with() and memcmp().
The return value of skip_prefix can
 be checked against NULL to determine whether correct format is used or
not.
 Though, even this change will left some of the magic no (as shown
above). ;-)
2. Define macros for all the magic no. (and tags like "tree", "parent"
etc.). This way the code will be more clear
 and any future changes to these magic no. (or tag names) will be much
easier to handle.

Perhaps provide an illustration to explain what you mean.
I think you want some explanation on point 2. What I have suggested here 
is that
all the keywords (like "tree", "parent") and magic no. (which are 
nothing but suitable


[PATCH] Modify fsck_commit. Replace memcmp by skip_prefix

2014-03-22 Thread Ashwin Jha
Replace memcmp by skip_prefix as it serves the dual
purpose of checking the string for a prefix as well
as skipping that prefix.

Signed-off-by: Ashwin Jha 
---
 fsck.c |   25 ++---
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/fsck.c b/fsck.c
index 64bf279..021b3fc 100644
--- a/fsck.c
+++ b/fsck.c
@@ -6,6 +6,7 @@
 #include "commit.h"
 #include "tag.h"
 #include "fsck.h"
+#include "git-compat-util.h"
 
 static int fsck_walk_tree(struct tree *tree, fsck_walk_func walk, void *data)
 {
@@ -284,21 +285,23 @@ static int fsck_ident(char **ident, struct object *obj, 
fsck_error error_func)
 
 static int fsck_commit(struct commit *commit, fsck_error error_func)
 {
-   char *buffer = commit->buffer;
+   char *next_parent, *buffer = commit->buffer;
unsigned char tree_sha1[20], sha1[20];
struct commit_graft *graft;
int parents = 0;
int err;
 
-   if (memcmp(buffer, "tree ", 5))
+   buffer = (char *)skip_prefix(buffer, "tree ");
+   if (buffer == NULL)
return error_func(&commit->object, FSCK_ERROR, "invalid format 
- expected 'tree' line");
-   if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n')
+   if (get_sha1_hex(buffer, tree_sha1) || buffer[40] != '\n')
return error_func(&commit->object, FSCK_ERROR, "invalid 'tree' 
line format - bad sha1");
-   buffer += 46;
-   while (!memcmp(buffer, "parent ", 7)) {
-   if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
+   buffer += 41;
+   while ((next_parent = (char *)skip_prefix(buffer, "parent ")) != NULL) {
+   buffer = next_parent;
+   if (get_sha1_hex(buffer, sha1) || buffer[40] != '\n')
return error_func(&commit->object, FSCK_ERROR, "invalid 
'parent' line format - bad sha1");
-   buffer += 48;
+   buffer += 41;
parents++;
}
graft = lookup_commit_graft(commit->object.sha1);
@@ -322,15 +325,15 @@ static int fsck_commit(struct commit *commit, fsck_error 
error_func)
if (p || parents)
return error_func(&commit->object, FSCK_ERROR, "parent 
objects missing");
}
-   if (memcmp(buffer, "author ", 7))
+   buffer = (char *)skip_prefix(buffer, "author ");
+   if (buffer == NULL)
return error_func(&commit->object, FSCK_ERROR, "invalid format 
- expected 'author' line");
-   buffer += 7;
err = fsck_ident(&buffer, &commit->object, error_func);
if (err)
return err;
-   if (memcmp(buffer, "committer ", strlen("committer ")))
+   buffer = (char *)skip_prefix(buffer, "committer ");
+   if (buffer == NULL)
return error_func(&commit->object, FSCK_ERROR, "invalid format 
- expected 'committer' line");
-   buffer += strlen("committer ");
err = fsck_ident(&buffer, &commit->object, error_func);
if (err)
return err;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Modify fsck_commit. Replace memcmp by skip_prefix

2014-03-23 Thread Ashwin Jha


Thanks for your comments, Tanay and Eric.

Regarding the [PATCH v2], I will keep this in mind for the subsequent 
patches.


On 03/23/2014 02:10 PM, Eric Sunshine wrote:

Thanks for the resubmission. Comments below...

On Sat, Mar 22, 2014 at 11:12 AM, Ashwin Jha  wrote:

Subject: [PATCH] Modify fsck_commit. Replace memcmp by skip_prefix

In his review, Tanay already mentioned indicating a resubmission via
[PATCH vN]. Additionally, specify the module or function being
modified, followed by a colon and space. On this project, it is
customary to forego capitalization in the subject (and only the
subject). So, you might write:

 Subject: [PATCH v2] fsck_commit: replace memcmp() by skip_prefix()


Replace memcmp by skip_prefix as it serves the dual
purpose of checking the string for a prefix as well
as skipping that prefix.

Good.


Signed-off-by: Ashwin Jha 
---

Tanay mentioned this already: Below the "---" line under your sign-off
is where you should, as a courtesy to reviewers, explain what changed
since the previous attempt. Also, provide a link to the previous
version, like this [1].

[1]: 
http://git.661346.n2.nabble.com/PATCH-GSoC-Miniproject-15-Rewrite-fsck-c-fsck-commit-td7606180.html


  fsck.c |   25 ++---
  1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/fsck.c b/fsck.c
index 64bf279..021b3fc 100644
--- a/fsck.c
+++ b/fsck.c
@@ -6,6 +6,7 @@
  #include "commit.h"
  #include "tag.h"
  #include "fsck.h"
+#include "git-compat-util.h"

  static int fsck_walk_tree(struct tree *tree, fsck_walk_func walk, void *data)
  {
@@ -284,21 +285,23 @@ static int fsck_ident(char **ident, struct object *obj, 
fsck_error error_func)

  static int fsck_commit(struct commit *commit, fsck_error error_func)
  {
-   char *buffer = commit->buffer;
+   char *next_parent, *buffer = commit->buffer;

The name 'next_parent' doesn't seem correct. After invoking
skip_prefix(), this variable will point at the hex representation of
the parent's SHA1, so a better name might be 'parent_id'.


 unsigned char tree_sha1[20], sha1[20];
 struct commit_graft *graft;
 int parents = 0;
 int err;

-   if (memcmp(buffer, "tree ", 5))
+   buffer = (char *)skip_prefix(buffer, "tree ");

These casts are ugly. It should be possible to get rid of all of them.
Hint: Does this function modify the memory referenced by 'buffer'?
(The answer is very slightly involved, though not difficult. A proper
fix would involve turning this submission into a 2-patch series: one a
preparatory patch, and the other this patch without the casts.)
Eric, certainly this function does not modify the memory referenced by 
'buffer'.

So, 'buffer' should be declared as a const char *.
But, what about the argument to fsck_ident(). First argument required is 
a char **.

Now, the compiler will correctly give a warning for incorrect argument type.

I have seen the code of fsck_ident(), and it is nowhere modifying the 
memory referenced by
buffer. So, I know that this will not cause any problem. But, still it 
will be a bad practice to do away with

warnings.

And can you explain me a bit about a 2-patch series.

Once again thanks for your help.

+   if (buffer == NULL)

On this project, it is customary to say !buffer. Ditto for the
remaining instances.

 return error_func(&commit->object, FSCK_ERROR, "invalid format - 
expected 'tree' line");
-   if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n')
+   if (get_sha1_hex(buffer, tree_sha1) || buffer[40] != '\n')
 return error_func(&commit->object, FSCK_ERROR, "invalid 'tree' line 
format - bad sha1");
-   buffer += 46;
-   while (!memcmp(buffer, "parent ", 7)) {
-   if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
+   buffer += 41;
+   while ((next_parent = (char *)skip_prefix(buffer, "parent ")) != NULL) {

Likewise, on this project, it is customary to omit the '!= NULL'.


+   buffer = next_parent;
+   if (get_sha1_hex(buffer, sha1) || buffer[40] != '\n')
 return error_func(&commit->object, FSCK_ERROR, "invalid 
'parent' line format - bad sha1");
-   buffer += 48;
+   buffer += 41;
 parents++;
 }
 graft = lookup_commit_graft(commit->object.sha1);
@@ -322,15 +325,15 @@ static int fsck_commit(struct commit *commit, fsck_error 
error_func)
 if (p || parents)
 return error_func(&commit->object, FSCK_ERROR, "parent 
objects missing");
 }
-   if (memcmp(buffer, "author ", 7))
+   buffer = (ch

[PATCH v3 1/2] fsck.c: modify fsck_ident() and fsck_commit()

2014-03-24 Thread Ashwin Jha
In fsck_ident(): Replace argument char **ident with const char **ident
In fsck_commit(): Replace char *buffer with const char *buffer

In both the cases, referenced memory addresses are not modified. So, it
will be a good practice, to declare them as const.

Signed-off-by: Ashwin Jha 
---

Change in fsck_commit() and fsck_ident() as per the discussion with
Eric (follow at [1]).
[1]: 
http://git.661346.n2.nabble.com/PATCH-Modify-fsck-commit-Replace-memcmp-by-skip-prefix-td7606321.html

 fsck.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fsck.c b/fsck.c
index 64bf279..b5f017f 100644
--- a/fsck.c
+++ b/fsck.c
@@ -243,7 +243,7 @@ static int fsck_tree(struct tree *item, int strict, 
fsck_error error_func)
return retval;
 }
 
-static int fsck_ident(char **ident, struct object *obj, fsck_error error_func)
+static int fsck_ident(const char **ident, struct object *obj, fsck_error 
error_func)
 {
char *end;
 
@@ -284,7 +284,7 @@ static int fsck_ident(char **ident, struct object *obj, 
fsck_error error_func)
 
 static int fsck_commit(struct commit *commit, fsck_error error_func)
 {
-   char *buffer = commit->buffer;
+   const char *buffer = commit->buffer;
unsigned char tree_sha1[20], sha1[20];
struct commit_graft *graft;
int parents = 0;
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 2/2] fsck.c:fsck_commit replace memcmp by skip_prefix

2014-03-24 Thread Ashwin Jha
Replace memcmp by skip_prefix as it serves the dual
purpose of checking the string for a prefix as well
as skipping that prefix.

Signed-off-by: Ashwin Jha 
---

fsck_commit(): After the first patch in this series, it is now safe to replace
memcmp() with skip_prefix().

Previous versions can be found at [v1] and [v2]:
[v1]: 
http://git.661346.n2.nabble.com/PATCH-GSoC-Miniproject-15-Rewrite-fsck-c-fsck-commit-td7606180.html
[v2]: 
http://git.661346.n2.nabble.com/PATCH-Modify-fsck-commit-Replace-memcmp-by-skip-prefix-td7606321.html

 fsck.c | 25 ++---
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/fsck.c b/fsck.c
index b5f017f..2232ce3 100644
--- a/fsck.c
+++ b/fsck.c
@@ -6,6 +6,7 @@
 #include "commit.h"
 #include "tag.h"
 #include "fsck.h"
+#include "git-compat-util.h"
 
 static int fsck_walk_tree(struct tree *tree, fsck_walk_func walk, void *data)
 {
@@ -284,21 +285,23 @@ static int fsck_ident(const char **ident, struct object 
*obj, fsck_error error_f
 
 static int fsck_commit(struct commit *commit, fsck_error error_func)
 {
-   const char *buffer = commit->buffer;
+   const char *parent_id, *buffer = commit->buffer;
unsigned char tree_sha1[20], sha1[20];
struct commit_graft *graft;
int parents = 0;
int err;
 
-   if (memcmp(buffer, "tree ", 5))
+   buffer = skip_prefix(buffer, "tree ");
+   if (!buffer)
return error_func(&commit->object, FSCK_ERROR, "invalid format 
- expected 'tree' line");
-   if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n')
+   if (get_sha1_hex(buffer, tree_sha1) || buffer[40] != '\n')
return error_func(&commit->object, FSCK_ERROR, "invalid 'tree' 
line format - bad sha1");
-   buffer += 46;
-   while (!memcmp(buffer, "parent ", 7)) {
-   if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
+   buffer += 41;
+   while ((parent_id = skip_prefix(buffer, "parent "))) {
+   buffer = parent_id;
+   if (get_sha1_hex(buffer, sha1) || buffer[40] != '\n')
return error_func(&commit->object, FSCK_ERROR, "invalid 
'parent' line format - bad sha1");
-   buffer += 48;
+   buffer += 41;
parents++;
}
graft = lookup_commit_graft(commit->object.sha1);
@@ -322,15 +325,15 @@ static int fsck_commit(struct commit *commit, fsck_error 
error_func)
if (p || parents)
return error_func(&commit->object, FSCK_ERROR, "parent 
objects missing");
}
-   if (memcmp(buffer, "author ", 7))
+   buffer = skip_prefix(buffer, "author ");
+   if (!buffer)
return error_func(&commit->object, FSCK_ERROR, "invalid format 
- expected 'author' line");
-   buffer += 7;
err = fsck_ident(&buffer, &commit->object, error_func);
if (err)
return err;
-   if (memcmp(buffer, "committer ", strlen("committer ")))
+   buffer = skip_prefix(buffer, "committer ");
+   if (!buffer)
return error_func(&commit->object, FSCK_ERROR, "invalid format 
- expected 'committer' line");
-   buffer += strlen("committer ");
err = fsck_ident(&buffer, &commit->object, error_func);
if (err)
return err;
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html