8000 Bindings/libgit2sharp/020 2 by grossvin22 · Pull Request #3337 · libgit2/libgit2 · GitHub
[go: up one dir, main page]

Skip to content

Bindings/libgit2sharp/020 2 #3337

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
path: Use UTF8 iteration for HFS chars
  • Loading branch information
vmg authored and Edward Thomson committed Dec 17, 2014
commit 0183c4d2590163f1ac2d813d84c33bdf1bb01f68
126 changes: 45 additions & 81 deletions src/path.c
Original file line number Diff line number Diff line change
Expand Up @@ -1282,93 +1282,57 @@ GIT_INLINE(bool) verify_dospath(
component[last] != ':');
}

GIT_INLINE(bool) verify_dotgit_hfs(const char *component, size_t len)
static int32_t next_hfs_char(const char **in, size_t *len)
{
const unsigned char *c;
int git = 0, ign = 0;
unsigned char one, two;

while (len) {
switch (*(c = (const unsigned char *)component++)) {
case '.':
if (ign || git++ != 0)
return true;
break;
case 'g':
case 'G':
if (ign || git++ != 1)
return true;
break;
case 'i':
case 'I':
if (ign || git++ != 2)
return true;
break;
case 't':
case 'T':
if (ign || git++ != 3)
return true;
break;

case 0xe2:
case 0xef:
if (ign++ != 0)
return true;
one = *c;
break;

case 0x80:
case 0x81:
if (ign++ != 1 || one != 0xe2)
return true;
two = *c;
break;

case 0xbb:
if (ign++ != 1 || one != 0xef)
return true;
two = *c;
break;

case 0x8c:
case 0x8d:
case 0x8e:
case 0x8f:
if (ign != 2 || two != 0x80)
return true;
ign = 0;
break;

case 0xaa:
case 0xab:
case 0xac:
case 0xad:
case 0xae:
if (ign != 2 || (two != 0x80 && two != 0x81))
return true;
ign = 0;
break;

case 0xaf:
if (ign != 2 || two != 0x81)
return true;
ign = 0;
break;

case 0xbf:
if (ign != 2 || two != 0xbb)
return true;
ign = 0;
break;
while (*len) {
int32_t codepoint;
int cp_len = git__utf8_iterate((const uint8_t *)(*in), (int)(*len), &codepoint);
if (cp_len < 0)
return -1;

default:
return true;
(*in) += cp_len;
(*len) -= cp_len;

/* these code points are ignored completely */
switch (codepoint) {
case 0x200c: /* ZERO WIDTH NON-JOINER */
case 0x200d: /* ZERO WIDTH JOINER */
case 0x200e: /* LEFT-TO-RIGHT MARK */
case 0x200f: /* RIGHT-TO-LEFT MARK */
case 0x202a: /* LEFT-TO-RIGHT EMBEDDING */
case 0x202b: /* RIGHT-TO-LEFT EMBEDDING */
case 0x202c: /* POP DIRECTIONAL FORMATTING */
case 0x202d: /* LEFT-TO-RIGHT OVERRIDE */
case 0x202e: /* RIGHT-TO-LEFT OVERRIDE */
case 0x206a: /* INHIBIT SYMMETRIC SWAPPING */
case 0x206b: /* ACTIVATE SYMMETRIC SWAPPING */
case 0x206c: /* INHIBIT ARABIC FORM SHAPING */
case 0x206d: /* ACTIVATE ARABIC FORM SHAPING */
case 0x206e: /* NATIONAL DIGIT SHAPES */
case 0x206f: /* NOMINAL DIGIT SHAPES */
case 0xfeff: /* ZERO WIDTH NO-BREAK SPACE */
continue;
}

len--;
/* fold into lowercase -- this will only fold characters in
* the ASCII range, which is perfectly fine, because the
* git folder name can only be composed of ascii characters
*/
return tolower(codepoint);
}
return 0; /* NULL byte -- end of string */
}

static bool verify_dotgit_hfs(const char *path, size_t len)
{
if (next_hfs_char(&path, &len) != '.' ||
next_hfs_char(&path, &len) != 'g' ||
next_hfs_char(&path, &len) != 'i' ||
next_hfs_char(&path, &len) != 't' ||
next_hfs_char(&path, &len) != 0)
return true;

return (ign || git != 4);
return false;
}

GIT_INLINE(bool) verify_char(unsigned char c, unsigned int flags)
Expand Down
76 changes: 76 additions & 0 deletions src/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -664,3 +664,79 @@ void git__insertsort_r(
if (freeswap)
git__free(swapel);
}

static const int8_t utf8proc_utf8class[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0
};

int git__utf8_charlen(const uint8_t *str, int str_len)
{
int length, i;

length = utf8proc_utf8class[str[0]];
if (!length)
return -1;

if (str_len >= 0 && length > str_len)
return -str_len;

for (i = 1; i < length; i++) {
if ((str[i] & 0xC0) != 0x80)
return -i;
}

return length;
}

int git__utf8_iterate(const uint8_t *str, int str_len, int32_t *dst)
{
int length;
int32_t uc = -1;

*dst = -1;
length = git__utf8_charlen(str, str_len);
if (length < 0)
return -1;

switch (length) {
case 1:
uc = str[0];
break;
case 2:
uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
if (uc < 0x80) uc = -1;
break;
case 3:
uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6)
+ (str[2] & 0x3F);
if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) ||
(uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1;
break;
case 4:
uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12)
+ ((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
if (uc < 0x10000 || uc >= 0x110000) uc = -1;
break;
}

if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE))
return -1;

*dst = uc;
return length;
}
11 changes: 11 additions & 0 deletions src/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,17 @@ extern int git__date_rfc2822_fmt(char *out, size_t len, const git_time *date);
*/
extern size_t git__unescape(char *str);

/*
* Iterate through an UTF-8 string, yielding one
* codepoint at a time.
*
* @param str current position in the string
* @param str_len size left in the string; -1 if the string is NULL-terminated
* @param dst pointer where to store the current codepoint
* @return length in bytes of the read codepoint; -1 if the codepoint was invalid
*/
extern int git__utf8_iterate(const uint8_t *str, int str_len, int32_t *dst);

/*
* Safely zero-out memory, making sure that the compiler
* doesn't optimize away the operation.
Expand Down
0