Skip to content

hansalemaos/regexfilesearch

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

3 Commits
 
 
 
 
 
 
 
 
 
 

Repository files navigation

Regex search in all kind of files  (in chunks) + binary sub

pip install regexfilesearch

import regex

from regexfilesearch import regex_filesearch, get_all_files_in_folders_with_subdir_limit

alf = get_all_files_in_folders_with_subdir_limit(
    folders=r"C:\Users\Gamer\anaconda3\envs\stopjogo", maxsubdirs=0
)
df = regex_filesearch(
    files=alf,
    regexpressions=[r"\bchar\b", r"import.*?pandas "],
    with_context=True,
    chunksize=8192,
    flags=regex.IGNORECASE,
)




print(df.loc[df.aa_bytesstart > 1][:20].to_string(max_colwidth=10))
    aa_chunkno    aa_file  aa_bytesstart  aa_bytesend  aa_chunkstart  aa_chunkend   aa_regex aa_regexpattern aa_replace aa_get_context aa_partial_result aa_full_match aa_partial_match  aa_groups  aa_allcaptures aa_result aa_result_utf8  aa_bytesstart_whole  aa_bytesend_whole
12          1   C:\Use...         52             56            0          16384    <regex...  regex....       f(newf...        f()          False         (b'#in...          <NA>        ((0, b...          0        b'char'       char             52                   56        
13          1   C:\Use...        182            186            0          16384    <regex...  regex....       f(newf...        f()          False         (b'#in...          <NA>        ((0, b...          0        b'char'       char            182                  186        
14          1   C:\Use...        217            221            0          16384    <regex...  regex....       f(newf...        f()          False         (b'#in...          <NA>        ((0, b...          0        b'char'       char            217                  221        
15          1   C:\Use...        254            258            0          16384    <regex...  regex....       f(newf...        f()          False         (b'#in...          <NA>        ((0, b...          0        b'char'       char            254                  258        
16          1   C:\Use...        440            444            0          16384    <regex...  regex....       f(newf...        f()          False         (b'#in...          <NA>        ((0, b...          0        b'char'       char            440                  444        
17          1   C:\Use...        582            586            0          16384    <regex...  regex....       f(newf...        f()          False         (b'#in...          <NA>        ((0, b...          0        b'char'       char            582                  586        
18          1   C:\Use...        622            626            0          16384    <regex...  regex....       f(newf...        f()          False         (b'#in...          <NA>        ((0, b...          0        b'char'       char            622                  626        
19          1   C:\Use...        661            665            0          16384    <regex...  regex....       f(newf...        f()          False         (b'#in...          <NA>        ((0, b...          0        b'char'       char            661                  665        
20          1   C:\Use...         52             56            0          16384    <regex...  regex....       f(newf...        f()          False         (b'#in...          <NA>        ((0, b...          0        b'char'       char             52                   56        
21          1   C:\Use...        182            186            0          16384    <regex...  regex....       f(newf...        f()          False         (b'#in...          <NA>        ((0, b...          0        b'char'       char            182                  186        
22          1   C:\Use...        217            221            0          16384    <regex...  regex....       f(newf...        f()          False         (b'#in...          <NA>        ((0, b...          0        b'char'       char            217                  221        
23          1   C:\Use...        254            258            0          16384    <regex...  regex....       f(newf...        f()          False         (b'#in...          <NA>        ((0, b...          0        b'char'       char            254                  258        
24          1   C:\Use...        440            444            0          16384    <regex...  regex....       f(newf...        f()          False         (b'#in...          <NA>        ((0, b...          0        b'char'       char            440                  444        
25          1   C:\Use...        582            586            0          16384    <regex...  regex....       f(newf...        f()          False         (b'#in...          <NA>        ((0, b...          0        b'char'       char            582                  586        
26          1   C:\Use...        622            626            0          16384    <regex...  regex....       f(newf...        f()          False         (b'#in...          <NA>        ((0, b...          0        b'char'       char            622                  626        
27          1   C:\Use...        661            665            0          16384    <regex...  regex....       f(newf...        f()          False         (b'#in...          <NA>        ((0, b...          0        b'char'       char            661                  665        
29          1   C:\Use...      20080          20084            0          16384    <regex...  regex....       f(newf...        f()          False         (b'<\x...          <NA>        ((0, b...          0        b'char'       char          20080                20084        
30          1   C:\Use...      20236          20240            0          16384    <regex...  regex....       f(newf...        f()          False         (b'<\x...          <NA>        ((0, b...          0        b'char'       char          20236                20240        
31          1   C:\Use...      20260          20264            0          16384    <regex...  regex....       f(newf...        f()          False         (b'<\x...          <NA>        ((0, b...          0        b'char'       char          20260                20264        
32          1   C:\Use...      21457          21461            0          16384    <regex...  regex....       f(newf...        f()          False         (b'<\x...          <NA>        ((0, b...          0        b'char'       char          21457                21461        




print(df.loc[df.aa_bytesstart > 1][:20].iloc[0].to_string())
aa_chunkno                                                             1
aa_file                C:\Users\Gamer\anaconda3\envs\stopjogo\mademod...
aa_bytesstart                                                         52
aa_bytesend                                                           56
aa_chunkstart                                                          0
aa_chunkend                                                        16384
aa_regex               <regex.Match object; span=(52, 56), match=b'ch...
aa_regexpattern        regex.Regex(b'\\bchar\\b', flags=regex.A | reg...
aa_replace                                f(newfile:str, newbytes:bytes)
aa_get_context                                                       f()
aa_partial_result                                                  False
aa_full_match          (b'#include <stdio.h>\r\n\r\n\r\n\r\n\r\n// np...
aa_partial_match                                                    <NA>
aa_groups                                    ((0, b'char'), ((52, 56),))
aa_allcaptures                                                         0
aa_result                                                        b'char'
aa_result_utf8                                                      char
aa_bytesstart_whole                                                   52
aa_bytesend_whole                                                     56



# df.loc[df.aa_bytesstart > 1][:20].aa_replace.iloc[0]('c:\\testestest.py', 'CHARRRRRRR')
# substitutes one match, never changes the original file
// np=np.byte, c=signed CHARRRRRRR, ctypes=ctypes.c_byte, code=b
// numpy.int8: 8-bit signed integer (-128 to 127).
// Signed integer type, compatible with C char
void cfun_byte(const  signed char  *indatav, size_t size,  signed char  *outdatav )