Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#944] Implement authorship analysis #2030

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
b4fc81b
add isAuthorshipAnalyzed flag
SkyBlaise99 Jul 26, 2023
1ea3210
add isAuthorshipAnalyzed to cli arguments
SkyBlaise99 Jul 26, 2023
5e2cdea
reformat cli args
SkyBlaise99 Jul 26, 2023
df5e498
added test cases for args parser
SkyBlaise99 Jul 27, 2023
9b77bda
reformat ArgsParserTest and InputBuilder
SkyBlaise99 Jul 27, 2023
689cdb3
update equals method in cli args
SkyBlaise99 Jul 27, 2023
cb0cf90
pass shouldAnalyzeAuthorship flag from reposense to report generator …
SkyBlaise99 Jul 30, 2023
81b7621
update javadocs for report generator and authorship reporter
SkyBlaise99 Jul 30, 2023
bce2fb2
reformat report generator and authorship reporter
SkyBlaise99 Jul 30, 2023
eb488bc
pass shouldAnalyzeAuthorship flag from authorship reporter to file in…
SkyBlaise99 Aug 1, 2023
e724d47
update javadocs for file info analyzer
SkyBlaise99 Aug 1, 2023
33e393d
add overloading method to fix failing testcases
SkyBlaise99 Aug 1, 2023
72921d7
implement authorship analyzer
SkyBlaise99 Aug 6, 2023
f3fbc33
reformat files
SkyBlaise99 Aug 6, 2023
f7cbe5b
set to default full credit
SkyBlaise99 Aug 7, 2023
03c1a2a
update expected outputs for local repo system tests
SkyBlaise99 Aug 7, 2023
acc6665
update expected outputs for config system tests
SkyBlaise99 Aug 7, 2023
71839dd
update comment
SkyBlaise99 Aug 7, 2023
c8442b8
add test cases for new git methods
SkyBlaise99 Aug 13, 2023
7b69c31
add AuthorshipAnalyzer test cases
SkyBlaise99 Aug 13, 2023
983c784
convert since date to millisec using config's zone id
SkyBlaise99 Aug 13, 2023
dc57517
fix error in obtaining commit time
SkyBlaise99 Aug 13, 2023
cadbeb2
shift getLevenshteinDistance to StringsUtil
SkyBlaise99 Aug 16, 2023
45539c1
fix warnings
SkyBlaise99 Aug 16, 2023
b2d6a08
Merge branch 'master' into 944-analyze-authorship
SkyBlaise99 Aug 17, 2023
397ca6f
store isFullCredit info into segments
SkyBlaise99 Aug 18, 2023
40a7598
store as a single value when the whole segment are all full credit,
SkyBlaise99 Aug 18, 2023
9272869
rename variable
SkyBlaise99 Aug 18, 2023
00c4036
update background for is not full credit
SkyBlaise99 Aug 18, 2023
97f1966
Merge branch 'master' into 944-analyze-authorship
SkyBlaise99 Aug 24, 2023
971cc80
switch to jdk 8 methods
SkyBlaise99 Aug 24, 2023
8293e0f
remove unused imports
SkyBlaise99 Aug 24, 2023
f8a8f30
Merge branch 'master' into 944-analyze-authorship
SkyBlaise99 Sep 1, 2023
489cf6d
fix null pointer exception caused by
SkyBlaise99 Sep 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion frontend/src/components/c-segment.vue
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<template lang="pug">
.segment(
v-bind:class="{ untouched: !segment.knownAuthor, active: isOpen }",
v-bind:class="{ untouched: !segment.knownAuthor, active: isOpen, isNotFullCredit: !segment.isFullCredit }",
v-bind:style="{ 'border-left': `0.25rem solid ${authorColors[segment.knownAuthor]}` }",
v-bind:title="`Author: ${segment.knownAuthor || \"Unknown\"}`"
)
Expand Down
1 change: 1 addition & 0 deletions frontend/src/types/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ export interface Repo extends RepoRaw {

export interface AuthorshipFileSegment {
knownAuthor: string | null;
isFullCredit: boolean;
lineNumbers: number[];
lines: string[];
}
Expand Down
1 change: 1 addition & 0 deletions frontend/src/types/zod/authorship-type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const lineSchema = z.object({
lineNumber: z.number(),
author: z.object({ gitId: z.string() }),
content: z.string(),
isFullCredit: z.boolean(),
});

const fileResult = z.object({
Expand Down
10 changes: 9 additions & 1 deletion frontend/src/utils/segment.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
export default class Segment {
knownAuthor: string | null;

isFullCredit: boolean;

lineNumbers: Array<number>;

lines: Array<string>;

constructor(knownAuthor: string | null, lineNumbers: Array<number>, lines: Array<string>) {
constructor(
knownAuthor: string | null,
isFullCredit: boolean,
lineNumbers: Array<number>,
lines: Array<string>,
) {
this.knownAuthor = knownAuthor;
this.isFullCredit = isFullCredit;
this.lineNumbers = lineNumbers;
this.lines = lines;
}
Expand Down
11 changes: 10 additions & 1 deletion frontend/src/views/c-authorship.vue
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,7 @@ export default defineComponent({
splitSegments(lines: Line[]): { segments: Segment[]; blankLineCount: number; } {
// split into segments separated by knownAuthor
let lastState: string | null;
let lastCreditState: boolean;
let lastId = -1;
const segments: Segment[] = [];
let blankLineCount = 0;
Expand All @@ -520,16 +521,19 @@ export default defineComponent({
? !this.isUnknownAuthor(line.author.gitId)
: line.author.gitId === this.info.author;
const knownAuthor = (line.author && isAuthorMatched) ? line.author.gitId : null;
const isFullCredit = line.isFullCredit;

if (knownAuthor !== lastState || lastId === -1) {
if (knownAuthor !== lastState || lastId === -1 || (knownAuthor && isFullCredit !== lastCreditState)) {
segments.push(new Segment(
knownAuthor,
isFullCredit,
[],
[],
));

lastId += 1;
lastState = knownAuthor;
lastCreditState = isFullCredit;
}

const content = line.content || ' ';
Expand Down Expand Up @@ -989,6 +993,11 @@ export default defineComponent({
padding-left: 2rem;
word-break: break-word;
}
&.isNotFullCredit {
.code {
background-color: mui-color('green', '100');
}
}
&.untouched {
$grey: mui-color('grey', '400');
border-left: .25rem solid $grey;
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/reposense/RepoSense.java
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ public static void main(String[] args) {
cliArguments.getSinceDate(), cliArguments.getUntilDate(),
cliArguments.isSinceDateProvided(), cliArguments.isUntilDateProvided(),
cliArguments.getNumCloningThreads(), cliArguments.getNumAnalysisThreads(),
TimeUtil::getElapsedTime, cliArguments.getZoneId(), cliArguments.isFreshClonePerformed());
TimeUtil::getElapsedTime, cliArguments.getZoneId(), cliArguments.isFreshClonePerformed(),
cliArguments.isAuthorshipAnalyzed());

FileUtil.zipFoldersAndFiles(reportFoldersAndFiles, cliArguments.getOutputFilePath().toAbsolutePath(),
".json");
Expand Down
9 changes: 4 additions & 5 deletions src/main/java/reposense/authorship/AuthorshipReporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import reposense.model.RepoConfiguration;
import reposense.system.LogsManager;


/**
* Generates the authorship summary data for each repository.
*/
Expand All @@ -29,24 +28,24 @@ public class AuthorshipReporter {
private final FileInfoAnalyzer fileInfoAnalyzer = new FileInfoAnalyzer();
private final FileResultAggregator fileResultAggregator = new FileResultAggregator();


/**
* Generates and returns the authorship summary for each repo in {@code config}.
* Further analyzes the authorship of each line in the commit if {@code shouldAnalyzeAuthorship} is true.
*/
public AuthorshipSummary generateAuthorshipSummary(RepoConfiguration config) {
public AuthorshipSummary generateAuthorshipSummary(RepoConfiguration config, boolean shouldAnalyzeAuthorship) {
List<FileInfo> textFileInfos = fileInfoExtractor.extractTextFileInfos(config);

int numFiles = textFileInfos.size();
int totalNumLines = textFileInfos.stream()
.mapToInt(fileInfo -> fileInfo.getNumOfLines())
.mapToInt(FileInfo::getNumOfLines)
.sum();

if (totalNumLines > HIGH_NUMBER_LINES_THRESHOLD) {
logger.warning(String.format(HIGH_NUMBER_LINES_MESSAGE, numFiles, totalNumLines));
}

List<FileResult> fileResults = textFileInfos.stream()
.map(fileInfo -> fileInfoAnalyzer.analyzeTextFile(config, fileInfo))
.map(fileInfo -> fileInfoAnalyzer.analyzeTextFile(config, fileInfo, shouldAnalyzeAuthorship))
.filter(Objects::nonNull)
.collect(Collectors.toList());

Expand Down
39 changes: 31 additions & 8 deletions src/main/java/reposense/authorship/FileInfoAnalyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import java.util.logging.Logger;

import reposense.authorship.analyzer.AnnotatorAnalyzer;
import reposense.authorship.analyzer.AuthorshipAnalyzer;
import reposense.authorship.model.FileInfo;
import reposense.authorship.model.FileResult;
import reposense.authorship.model.LineInfo;
Expand Down Expand Up @@ -44,10 +45,11 @@ public class FileInfoAnalyzer {
/**
* Analyzes the lines of the file, given in the {@code fileInfo}, that has changed in the time period provided
* by {@code config}.
* Further analyzes the authorship of each line in the commit if {@code shouldAnalyzeAuthorship} is true.
* Returns null if the file is missing from the local system, or none of the
* {@link Author} specified in {@code config} contributed to the file in {@code fileInfo}.
*/
public FileResult analyzeTextFile(RepoConfiguration config, FileInfo fileInfo) {
public FileResult analyzeTextFile(RepoConfiguration config, FileInfo fileInfo, boolean shouldAnalyzeAuthorship) {
String relativePath = fileInfo.getPath();

if (Files.notExists(Paths.get(config.getRepoRoot(), relativePath))) {
Expand All @@ -59,7 +61,7 @@ public FileResult analyzeTextFile(RepoConfiguration config, FileInfo fileInfo) {
return null;
}

aggregateBlameAuthorModifiedAndDateInfo(config, fileInfo);
aggregateBlameAuthorModifiedAndDateInfo(config, fileInfo, shouldAnalyzeAuthorship);
fileInfo.setFileType(config.getFileType(fileInfo.getPath()));

AnnotatorAnalyzer.aggregateAnnotationAuthorInfo(fileInfo, config.getAuthorConfig());
Expand All @@ -71,6 +73,19 @@ public FileResult analyzeTextFile(RepoConfiguration config, FileInfo fileInfo) {
return generateTextFileResult(fileInfo);
}

/**
* Overloading method for test cases.
* <br>
* Analyzes the lines of the file, given in the {@code fileInfo}, that has changed in the time period provided
* by {@code config}, without further analyzing the authorship of each line in the commit.
* Does not further analyze the authorship of each line in the commit.
* Returns null if the file is missing from the local system, or none of the
* {@link Author} specified in {@code config} contributed to the file in {@code fileInfo}.
*/
public FileResult analyzeTextFile(RepoConfiguration config, FileInfo fileInfo) {
return analyzeTextFile(config, fileInfo, false);
}

/**
* Analyzes the binary file, given in the {@code fileInfo}, that has changed in the time period provided
* by {@code config}.
Expand Down Expand Up @@ -100,9 +115,8 @@ private FileResult generateTextFileResult(FileInfo fileInfo) {
authorContributionMap.put(author, authorContributionMap.getOrDefault(author, 0) + 1);
}

return FileResult.createTextFileResult(
fileInfo.getPath(), fileInfo.getFileType(), fileInfo.getLines(), authorContributionMap,
fileInfo.exceedsFileLimit());
return FileResult.createTextFileResult(fileInfo.getPath(), fileInfo.getFileType(), fileInfo.getLines(),
authorContributionMap, fileInfo.exceedsFileLimit());
}

/**
Expand Down Expand Up @@ -139,8 +153,10 @@ private FileResult generateBinaryFileResult(RepoConfiguration config, FileInfo f
* The {@code config} is used to obtain the root directory for running git blame as well as other parameters used
* in determining which author to assign to each line and whether to set the last modified date for a
* {@code lineInfo}.
* Further analyzes the authorship of each line in the commit if {@code shouldAnalyzeAuthorship} is true.
*/
private void aggregateBlameAuthorModifiedAndDateInfo(RepoConfiguration config, FileInfo fileInfo) {
private void aggregateBlameAuthorModifiedAndDateInfo(RepoConfiguration config, FileInfo fileInfo,
boolean shouldAnalyzeAuthorship) {
String blameResults;

if (!config.isFindingPreviousAuthorsPerformed()) {
Expand All @@ -159,14 +175,14 @@ private void aggregateBlameAuthorModifiedAndDateInfo(RepoConfiguration config, F
String authorName = blameResultLines[lineCount + 1].substring(AUTHOR_NAME_OFFSET);
String authorEmail = blameResultLines[lineCount + 2]
.substring(AUTHOR_EMAIL_OFFSET).replaceAll("<|>", "");
Long commitDateInMs = Long.parseLong(blameResultLines[lineCount + 3].substring(AUTHOR_TIME_OFFSET)) * 1000;
long commitDateInMs = Long.parseLong(blameResultLines[lineCount + 3].substring(AUTHOR_TIME_OFFSET)) * 1000;
LocalDateTime commitDate = LocalDateTime.ofInstant(Instant.ofEpochMilli(commitDateInMs),
config.getZoneId());
Author author = config.getAuthor(authorName, authorEmail);

if (!fileInfo.isFileLineTracked(lineCount / 5) || author.isIgnoringFile(filePath)
|| CommitHash.isInsideCommitList(commitHash, config.getIgnoreCommitList())
|| commitDate.compareTo(sinceDate) < 0 || commitDate.compareTo(untilDate) > 0) {
|| commitDate.isBefore(sinceDate) || commitDate.isAfter(untilDate)) {
author = Author.UNKNOWN_AUTHOR;
}

Expand All @@ -179,6 +195,13 @@ private void aggregateBlameAuthorModifiedAndDateInfo(RepoConfiguration config, F
fileInfo.setLineLastModifiedDate(lineCount / 5, commitDate);
}
fileInfo.setLineAuthor(lineCount / 5, author);

if (shouldAnalyzeAuthorship && !author.equals(Author.UNKNOWN_AUTHOR)) {
String lineContent = fileInfo.getLine(lineCount / 5 + 1).getContent();
boolean isFullCredit = AuthorshipAnalyzer.analyzeAuthorship(config, fileInfo.getPath(), lineContent,
commitHash, author);
fileInfo.setIsFullCredit(lineCount / 5, isFullCredit);
}
}
}

Expand Down
Loading
Loading