Skip to content

Commit

Permalink
Merge pull request #8684 from shayn-orca/bugfix-7642
Browse files Browse the repository at this point in the history
Added support for jobs with long filenames
  • Loading branch information
williammartin committed May 6, 2024
2 parents 6f35082 + 61a698a commit 4896546
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 9 deletions.
Binary file modified pkg/cmd/run/view/fixtures/run_log.zip
Binary file not shown.
78 changes: 74 additions & 4 deletions pkg/cmd/run/view/view.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"strconv"
"strings"
"time"
"unicode/utf16"

"github.com/MakeNowJust/heredoc"
"github.com/cli/cli/v2/api"
Expand Down Expand Up @@ -519,6 +520,8 @@ func promptForJob(prompter shared.Prompter, cs *iostreams.ColorScheme, jobs []sh
return nil, nil
}

const JOB_NAME_MAX_LENGTH = 90

func logFilenameRegexp(job shared.Job, step shared.Step) *regexp.Regexp {
// As described in https://github.com/cli/cli/issues/5011#issuecomment-1570713070, there are a number of steps
// the server can take when producing the downloaded zip file that can result in a mismatch between the job name
Expand All @@ -527,15 +530,82 @@ func logFilenameRegexp(job shared.Job, step shared.Step) *regexp.Regexp {
// * Truncating names that are too long for zip files
// * Adding collision deduplicating numbers for jobs with the same name
//
// We are hesitant to duplicate all the server logic due to the fragility but while we explore our options, it
// is sensible to fix the issue that is unavoidable for users, that when a job uses a composite action, the server
// constructs a job name by constructing a job name of `<JOB_NAME`> / <ACTION_NAME>`. This means that logs will
// never be found for jobs that use composite actions.
// We are hesitant to duplicate all the server logic due to the fragility but it may be unavoidable. Currently, we:
// * Strip `/` which occur when composite action job names are constructed of the form `<JOB_NAME`> / <ACTION_NAME>`
// * Truncate long job names
//
sanitizedJobName := strings.ReplaceAll(job.Name, "/", "")
sanitizedJobName = truncateAsUTF16(sanitizedJobName, JOB_NAME_MAX_LENGTH)
re := fmt.Sprintf(`^%s\/%d_.*\.txt`, regexp.QuoteMeta(sanitizedJobName), step.Number)
return regexp.MustCompile(re)
}

/*
If you're reading this comment by necessity, I'm sorry and if you're reading it for fun, you're welcome, you weirdo.
What is the length of this string "a😅😅"? If you said 9 you'd be right. If you said 3 or 5 you might also be right!
Here's a summary:
"a" takes 1 byte (`\x61`)
"😅" takes 4 `bytes` (`\xF0\x9F\x98\x85`)
"a😅😅" therefore takes 9 `bytes`
In Go `len("a😅😅")` is 9 because the `len` builtin counts `bytes`
In Go `len([]rune("a😅😅"))` is 3 because each `rune` is 4 `bytes` so each character fits within a `rune`
In C# `"a😅😅".Length` is 5 because `.Length` counts `Char` objects, `Chars` hold 2 bytes, and "😅" takes 2 Chars.
But wait, what does C# have to do with anything? Well the server is running C#. Which server? The one that serves log
files to us in `.zip` format of course! When the server is constructing the zip file to avoid running afoul of a 260
byte zip file path length limitation, it applies transformations to various strings in order to limit their length.
In C#, the server truncates strings with this function:
public static string TruncateAfter(string str, int max)
{
string result = str.Length > max ? str.Substring(0, max) : str;
result = result.Trim();
return result;
}
This seems like it would be easy enough to replicate in Go but as we already discovered, the length of a string isn't
as obvious as it might seem. Since C# uses UTF-16 encoding for strings, and Go uses UTF-8 encoding and represents
characters by runes (which are an alias of int32) we cannot simply slice the string without any further consideration.
Instead, we need to encode the string as UTF-16 bytes, slice it and then decode it back to UTF-8.
Interestingly, in C# length and substring both act on the Char type so it's possible to slice into the middle of
a visual, "representable" character. For example we know `"a😅😅".Length` = 5 (1+2+2) and therefore Substring(0,4)
results in the final character being cleaved in two, resulting in "a😅�". Since our int32 runes are being encoded as
2 uint16 elements, we also mimic this behaviour by slicing into the UTF-16 encoded string.
Here's a program you can put into a dotnet playground to see how C# works:
using System;
public class Program {
public static void Main() {
string s = "a😅😅";
Console.WriteLine("{0} {1}", s.Length, s);
string t = TruncateAfter(s, 4);
Console.WriteLine("{0} {1}", t.Length, t);
}
public static string TruncateAfter(string str, int max) {
string result = str.Length > max ? str.Substring(0, max) : str;
return result.Trim();
}
}
This will output:
5 a😅😅
4 a😅�
*/
func truncateAsUTF16(str string, max int) string {
// Encode the string to UTF-16 to count code units
utf16Encoded := utf16.Encode([]rune(str))
if len(utf16Encoded) > max {
// Decode back to UTF-8 up to the max length
str = string(utf16.Decode(utf16Encoded[:max]))
}
return strings.TrimSpace(str)
}

// This function takes a zip file of logs and a list of jobs.
// Structure of zip file
//
Expand Down
38 changes: 33 additions & 5 deletions pkg/cmd/run/view/view_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1376,6 +1376,8 @@ func TestViewRun(t *testing.T) {
}

// Structure of fixture zip file
// To see the structure of fixture zip file, run:
// `❯ unzip -lv pkg/cmd/run/view/fixtures/run_log.zip`
//
// run log/
// ├── cool job/
Expand Down Expand Up @@ -1487,23 +1489,49 @@ func Test_attachRunLog(t *testing.T) {
// not the double space in the dir name, as the slash has been removed
wantFilename: "cool job with slash/1_fob the barz.txt",
},
{
name: "Job name with really long name (over the ZIP limit)",
job: shared.Job{
Name: "thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_",
Steps: []shared.Step{{
Name: "Long Name Job",
Number: 1,
}},
},
wantMatch: true,
wantFilename: "thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnineteenchars_thisisnine/1_Long Name Job.txt",
},
{
name: "Job name that would be truncated by the C# server to split a grapheme",
job: shared.Job{
Name: "Emoji Test 😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅",
Steps: []shared.Step{{
Name: "Emoji Job",
Number: 1,
}},
},
wantMatch: true,
wantFilename: "Emoji Test 😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅😅�/1_Emoji Job.txt",
},
}

rlz, err := zip.OpenReader("./fixtures/run_log.zip")
run_log_zip_reader, err := zip.OpenReader("./fixtures/run_log.zip")
require.NoError(t, err)
defer rlz.Close()
defer run_log_zip_reader.Close()

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {

attachRunLog(&rlz.Reader, []shared.Job{tt.job})
attachRunLog(&run_log_zip_reader.Reader, []shared.Job{tt.job})

t.Logf("Job details: ")

for _, step := range tt.job.Steps {
log := step.Log
logPresent := log != nil
require.Equal(t, tt.wantMatch, logPresent)
require.Equal(t, tt.wantMatch, logPresent, "log not present")
if logPresent {
require.Equal(t, tt.wantFilename, log.Name)
require.Equal(t, tt.wantFilename, log.Name, "Filename mismatch")
}
}
})
Expand Down

0 comments on commit 4896546

Please sign in to comment.