Skip to content

Commit

Permalink
[#2091] Minor Enhancements to Existing Regex Code (#2115)
Browse files Browse the repository at this point in the history
* Enhance existing Regex code

* Consolidate typical Regex patterns

---------

Co-authored-by: Charisma Kausar <[email protected]>
Co-authored-by: Gokul Rajiv <[email protected]>
Co-authored-by: Marcus Tang <[email protected]>
  • Loading branch information
4 people committed Mar 26, 2024
1 parent cc0651f commit 3140c31
Show file tree
Hide file tree
Showing 13 changed files with 48 additions and 28 deletions.
3 changes: 2 additions & 1 deletion src/main/java/reposense/authorship/FileInfoAnalyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import reposense.model.RepoConfiguration;
import reposense.system.LogsManager;
import reposense.util.FileUtil;
import reposense.util.StringsUtil;

/**
* Analyzes the target and information given in the {@link FileInfo}.
Expand Down Expand Up @@ -149,7 +150,7 @@ private void aggregateBlameAuthorModifiedAndDateInfo(RepoConfiguration config, F
blameResults = getGitBlameWithPreviousAuthorsResult(config, fileInfo.getPath());
}

String[] blameResultLines = blameResults.split("\n");
String[] blameResultLines = StringsUtil.NEWLINE.split(blameResults);
Path filePath = Paths.get(fileInfo.getPath());
LocalDateTime sinceDate = config.getSinceDate();
LocalDateTime untilDate = config.getUntilDate();
Expand Down
7 changes: 4 additions & 3 deletions src/main/java/reposense/authorship/FileInfoExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import reposense.model.RepoConfiguration;
import reposense.system.LogsManager;
import reposense.util.FileUtil;
import reposense.util.StringsUtil;

/**
* Extracts out all the relevant {@code FileInfo} from the repository.
Expand Down Expand Up @@ -146,9 +147,9 @@ public Set<Path> getFiles(RepoConfiguration repoConfig, boolean isBinaryFile) {
// Gets rid of files with invalid directory name and filters by the {@code isBinaryFile} flag
return modifiedFileList.stream()
.filter(file -> isBinaryFile == file.startsWith(BINARY_FILE_LINE_DIFF_RESULT))
.map(file -> file.split("\t")[2])
.map(file -> StringsUtil.TAB.split(file)[2])
.filter(FileUtil::isValidPathWithLogging)
.map(filteredFile -> Paths.get(filteredFile))
.map(Paths::get)
.collect(Collectors.toCollection(HashSet::new));
}

Expand All @@ -164,7 +165,7 @@ private void setLinesToTrack(FileInfo fileInfo, String fileDiffResult) {
// skips the header, index starts from 1
for (int sectionIndex = 1; sectionIndex < linesChangedChunk.length; sectionIndex++) {
String linesChangedInSection = linesChangedChunk[sectionIndex];
String[] linesChanged = linesChangedInSection.split("\n");
String[] linesChanged = StringsUtil.NEWLINE.split(linesChangedInSection);
int startingLineNumber = getStartingLineNumber(linesChanged[LINE_CHANGED_HEADER_INDEX]);

// mark all untouched lines between sections as untracked
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/reposense/git/GitCatFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import reposense.git.exception.CommitNotFoundException;
import reposense.system.LogsManager;
import reposense.util.StringsUtil;

/**
* Contains git cat file related functionalities.
Expand All @@ -31,7 +32,7 @@ public static List<String> getParentCommits(String root, String commitHash) thro
try {
String output = runCommand(rootPath, catFileCommand);
List<String> parentCommits = new ArrayList<>();
for (String line : output.split("\n")) {
for (String line : StringsUtil.NEWLINE.split(output)) {
if (line.startsWith("parent")) {
parentCommits.add(line.substring(7).trim());
}
Expand Down
8 changes: 6 additions & 2 deletions src/main/java/reposense/git/GitConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import reposense.system.CommandRunner;
import reposense.system.LogsManager;
import reposense.util.StringsUtil;

/**
* Contains git config related functionalities.
Expand All @@ -36,10 +38,12 @@ public class GitConfig {
* @return a list of string arrays where 0-index is key and 1-index is value.
*/
public static List<String[]> getGlobalGitLfsConfig() {
Pattern equals = Pattern.compile("=");

try {
String gitConfig = getGitGlobalConfig();
return Arrays.stream(gitConfig.split("\n"))
.map(line -> line.split("="))
return Arrays.stream(StringsUtil.NEWLINE.split(gitConfig))
.map(equals::split)
.filter(line -> line[0].equals(FILTER_LFS_SMUDGE_KEY) || line[0].equals((FILTER_LFS_PROCESS_KEY)))
.collect(Collectors.toList());
} catch (RuntimeException re) {
Expand Down
5 changes: 4 additions & 1 deletion src/main/java/reposense/git/GitDiff.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
import java.util.Arrays;
import java.util.List;

import reposense.util.StringsUtil;


/**
* Contains git diff related functionalities.
* Git diff is responsible for obtaining the changes between commits, commit and working tree, etc.
Expand All @@ -32,6 +35,6 @@ public static List<String> getModifiedFilesList(Path repoRoot) {
String diffCommand = String.format("git diff --ignore-submodules=all --numstat %s %s",
EMPTY_TREE_HASH, CHECKED_OUT_COMMIT_REFERENCE);
String diffResult = runCommand(repoRoot.toAbsolutePath(), diffCommand);
return Arrays.asList(diffResult.split("\n"));
return Arrays.asList(StringsUtil.NEWLINE.split(diffResult));
}
}
5 changes: 3 additions & 2 deletions src/main/java/reposense/git/GitLog.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import reposense.model.Author;
import reposense.model.RepoConfiguration;
import reposense.util.StringsUtil;

/**
* Contains git log related functionalities.
Expand Down Expand Up @@ -69,8 +70,8 @@ public static List<String[]> getFileAuthors(RepoConfiguration config, String fil
command += " " + addQuotesForFilePath(filePath);

String result = runCommand(rootPath, command);
return Arrays.stream(result.split("\n"))
.map(authorAndEmailLine -> authorAndEmailLine.split("\t"))
return Arrays.stream(StringsUtil.NEWLINE.split(result))
.map(StringsUtil.TAB::split)
.map(authorAndEmailArray -> authorAndEmailArray.length == 1
? new String[] {authorAndEmailArray[0], DEFAULT_EMAIL_IF_MISSING}
: authorAndEmailArray)
Expand Down
7 changes: 5 additions & 2 deletions src/main/java/reposense/git/GitRemote.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
import java.util.Map;
import java.util.Optional;
import java.util.logging.Logger;
import java.util.regex.Pattern;

import reposense.system.CommandRunner;
import reposense.system.LogsManager;
import reposense.util.StringsUtil;

/**
* Contains git remote related functionality.
Expand All @@ -27,6 +29,7 @@ public class GitRemote {
* @return Map of keys of the form REMOTE_NAME(fetch) or REMOTE_NAME(push) to their corresponding remote URLs.
*/
public static Map<String, String> getRemotes(String repoRoot) {
Pattern anyTabPattern = Pattern.compile("[ \\t]+");
Map<String, String> remotes = new HashMap<>();
String result;
try {
Expand All @@ -36,8 +39,8 @@ public static Map<String, String> getRemotes(String repoRoot) {
return remotes;
}

Arrays.stream(result.split("\n"))
.map(s -> s.split("[ \\t]+"))
Arrays.stream(StringsUtil.NEWLINE.split(result))
.map(anyTabPattern::split)
.forEach(l -> {
if (l.length == 3) {
// l[0]: remote name
Expand Down
5 changes: 4 additions & 1 deletion src/main/java/reposense/git/GitRevList.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
import java.util.Arrays;
import java.util.List;

import reposense.util.StringsUtil;


/**
* Contains git rev list related functionalities.
* Git rev list is responsible for showing commit objects in reverse chronological order.
Expand Down Expand Up @@ -106,7 +109,7 @@ public static List<String> getRootCommits(String root) {
String revListCommand = "git rev-list --max-parents=0 HEAD";
Path rootPath = Paths.get(root);
String output = runCommand(rootPath, revListCommand);
return Arrays.asList(output.split("\n"));
return Arrays.asList(StringsUtil.NEWLINE.split(output));
}

/**
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/reposense/git/GitShortlog.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import reposense.model.Author;
import reposense.model.RepoConfiguration;
import reposense.util.StringsUtil;

/**
* Contains git shortlog related functionalities.
Expand All @@ -32,9 +33,9 @@ public static List<Author> getAuthors(RepoConfiguration config) {
return Collections.emptyList();
}

String[] lines = summary.split("\n");
String[] lines = StringsUtil.NEWLINE.split(summary);
return Arrays.stream(lines)
.map(line -> new Author(line.split("\t")[1]))
.map(line -> new Author(StringsUtil.TAB.split(line)[1]))
.collect(Collectors.toList());
}

Expand Down
3 changes: 2 additions & 1 deletion src/main/java/reposense/git/GitShow.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import reposense.git.exception.CommitNotFoundException;
import reposense.model.CommitHash;
import reposense.system.LogsManager;
import reposense.util.StringsUtil;

/**
* Contains git show related functionalities.
Expand All @@ -36,7 +37,7 @@ public static CommitHash getExpandedCommitHash(String root, String shortCommitHa

try {
String output = runCommand(rootPath, showCommand);
List<CommitHash> commitHashes = Arrays.stream(output.split("\n"))
List<CommitHash> commitHashes = Arrays.stream(StringsUtil.NEWLINE.split(output))
.map(CommitHash::new).collect(Collectors.toList());
if (commitHashes.size() > 1) {
logger.warning(String.format("%s can be expanded to %d different commits, "
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/reposense/model/CommitHash.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.stream.Stream;

import reposense.git.GitRevList;
import reposense.util.StringsUtil;

/**
* Represents a git commit hash in {@code RepoConfiguration}.
Expand Down Expand Up @@ -77,7 +78,7 @@ public static Stream<CommitHash> getHashes(String root, String branchName, Commi

String[] startAndEnd = entry.toString().split("\\.\\.");
String revList = GitRevList.getCommitHashInRange(root, branchName, startAndEnd[0], startAndEnd[1]);
return Arrays.stream(revList.split("\n"))
return Arrays.stream(StringsUtil.NEWLINE.split(revList))
.map(CommitHash::new);
}

Expand Down
9 changes: 5 additions & 4 deletions src/main/java/reposense/util/StringsUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,19 @@
* Contains strings related utilities.
*/
public class StringsUtil {

public static final Pattern NEWLINE = Pattern.compile("\n");
public static final Pattern TAB = Pattern.compile("\t");
public static final Pattern NUMERIC = Pattern.compile("^\\d+$");
private static final Pattern SPECIAL_SYMBOLS = Pattern.compile("[@;:&/\\\\!<>{}%#\"\\-='()\\[\\].+*?^$|]");

/**
* Filters the {@code text}, returning only the lines that matches the given {@code regex}.
*/
public static String filterText(String text, String regex) {
String[] split = text.split("\n");
StringBuilder sb = new StringBuilder();
Pattern regexPattern = Pattern.compile(regex);

for (String line: split) {
for (String line: NEWLINE.split(text)) {
if (regexPattern.matcher(line).matches()) {
sb.append(line).append("\n");
}
Expand Down Expand Up @@ -91,6 +92,6 @@ public static String removeTrailingBackslash(String string) {
* Returns true iff {@code string} is purely numeric.
*/
public static boolean isNumeric(String string) {
return Pattern.compile("^\\d+$").matcher(string).matches();
return NUMERIC.matcher(string).matches();
}
}
13 changes: 6 additions & 7 deletions src/test/java/reposense/util/TestUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ public class TestUtil {
+ ">> %s\n";

private static final String MESSAGE_LINES_LENGTH_DIFFERENT = "The files' lines count do not match.";
private static final String TAB_SPLITTER = "\t";
private static final String MOVED_FILE_INDICATION = "=> ";
private static final int STAT_FILE_PATH_INDEX = 2;

Expand All @@ -54,10 +53,10 @@ public static boolean compareFileContents(Path expected, Path actual, int maxTra

System.out.println(String.format(MESSAGE_COMPARING_FILES, expected, actual));

String[] expectedContent = new String(Files.readAllBytes(expected))
.replace("\r", "").split("\n");
String[] actualContent = new String(Files.readAllBytes(actual))
.replace("\r", "").split("\n");
String[] expectedContent = StringsUtil.NEWLINE.split(new String(Files.readAllBytes(expected))
.replace("\r", ""));
String[] actualContent = StringsUtil.NEWLINE.split(new String(Files.readAllBytes(actual))
.replace("\r", ""));

for (int i = 0; i < Math.min(expectedContent.length, actualContent.length); i++) {
if (!expectedContent[i].equals(actualContent[i])) {
Expand Down Expand Up @@ -197,7 +196,7 @@ public static boolean compareNumberFilesChanged(int expectedNumberFilesChanged,
*/
private static Set<String> getFilesChangedInCommit(String rawCommitInfo) {
Set<String> filesChanged = new HashSet<>();
String[] commitInfo = rawCommitInfo.replaceAll("\n+$", "").split("\n");
String[] commitInfo = StringsUtil.NEWLINE.split(rawCommitInfo.replaceAll("\n+$", ""));
int fileChangedNum = Integer.parseInt(commitInfo[commitInfo.length - 1].trim().split(" ")[0]);
for (int fileNum = 0; fileNum < fileChangedNum; fileNum++) {
filesChanged.add(getFileChanged(commitInfo[commitInfo.length - 2 - fileNum]));
Expand All @@ -209,7 +208,7 @@ private static Set<String> getFilesChangedInCommit(String rawCommitInfo) {
* Returns the file changed given a {@code rawFileChangedString}.
*/
private static String getFileChanged(String rawFileChangedString) {
String fileChanged = rawFileChangedString.split(TAB_SPLITTER)[STAT_FILE_PATH_INDEX].trim();
String fileChanged = StringsUtil.TAB.split(rawFileChangedString)[STAT_FILE_PATH_INDEX].trim();
if (fileChanged.contains(MOVED_FILE_INDICATION)) {
fileChanged = fileChanged.substring(fileChanged.indexOf(MOVED_FILE_INDICATION)
+ MOVED_FILE_INDICATION.length());
Expand Down

0 comments on commit 3140c31

Please sign in to comment.