Skip to content

Commit

Permalink
Adds mirror source code
Browse files Browse the repository at this point in the history
  • Loading branch information
cuducos committed Apr 26, 2024
1 parent f0323ab commit 8e75cd8
Show file tree
Hide file tree
Showing 12 changed files with 390 additions and 6 deletions.
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ Você pode [baixar o binário executável](https://prestd.com/) e seguir a [docu

Por exemplo, com `http://localhost:8081/minhareceita/public/cnpj?id=33683111000280`, mas essa resposta é diferente da original:
* Ela é um _array_ e não um objeto
* Ela tem tanto a coluna `id` quanto a `cnpj`
* Ela tem tanto a coluna `id` quanto a `cnpj`

Ou seja, dado que a resposta do pREST seja uma variável `resp`, o resultado de `https://minhareceita.org/33683111000280` deve ser igual a `resp[0].json`.

Expand Down
2 changes: 1 addition & 1 deletion cmd/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ var (
const checksumHelper = `
Checksum of the downloaded files.
Even though the official website of the Brazilian Federal Revenue does not offer
Even though the official website of the Brazilian Federal Revenue does not offer
a checksum for their files, this command can be used to create or check the checksum
of downloaded files.`

Expand Down
1 change: 1 addition & 0 deletions cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ func CLI() *cobra.Command {
dropCmd,
transformCLI(),
sampleCLI(),
mirrorCLI(),
} {
rootCmd.AddCommand(c)
}
Expand Down
8 changes: 4 additions & 4 deletions cmd/download.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ var (
chunkSize int64
skipExistingFiles bool
restart bool
mirror string
useMirror string
)

var downloadCmd = &cobra.Command{
Expand All @@ -51,8 +51,8 @@ var downloadCmd = &cobra.Command{
if err != nil {
return err
}
if mirror != "" {
return download.DownloadFromMirror(mirror, dir, dur, skipExistingFiles, restart, parallelDownloads, downloadRetries, chunkSize)
if useMirror != "" {
return download.DownloadFromMirror(useMirror, dir, dur, skipExistingFiles, restart, parallelDownloads, downloadRetries, chunkSize)
}
return download.Download(dir, dur, skipExistingFiles, restart, parallelDownloads, downloadRetries, chunkSize)
},
Expand Down Expand Up @@ -97,7 +97,7 @@ func downloadCLI() *cobra.Command {
downloadCmd.Flags().IntVarP(&parallelDownloads, "parallel", "p", download.DefaultMaxParallel, "maximum parallel downloads")
downloadCmd.Flags().Int64VarP(&chunkSize, "chunk-size", "c", download.DefaultChunkSize, "max length of the bytes range for each HTTP request")
downloadCmd.Flags().BoolVarP(&restart, "restart", "e", false, "restart all downloads from the beginning")
downloadCmd.Flags().StringVarP(&mirror, "mirror", "m", "", "download from the mirror, not from the original source (YYYY-MM-DD)")
downloadCmd.Flags().StringVarP(&useMirror, "mirror", "m", "", "download from the mirror, not from the original source (YYYY-MM-DD)")
return downloadCmd
}

Expand Down
42 changes: 42 additions & 0 deletions cmd/mirror.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package cmd

import (
"fmt"
"os"

"github.com/cuducos/minha-receita/mirror"
"github.com/spf13/cobra"
)

const mirrorHelper = `
Mirror of CNPJ files from the Federal Revenue.
Minha Receita maintains a mirror of data from the Federal Revenue CNPJ, in
addition to the executables. This is the wbe interface for the bucket of these
files.`

var mirrorCmd = &cobra.Command{
Use: "mirror",
Long: mirrorHelper,
Short: "Starts the files mirror web interface.",
RunE: func(_ *cobra.Command, _ []string) error {
if port == "" {
port = os.Getenv("PORT")
}
if port == "" {
port = defaultPort
}
return mirror.Mirror(port)
},
}

func mirrorCLI() *cobra.Command {
mirrorCmd.Flags().StringVarP(
&port,
"port",
"p",
"",
fmt.Sprintf("web server port (default PORT environment variable or %s)", defaultPort),
)
return mirrorCmd
}
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/cuducos/minha-receita
go 1.21

require (
github.com/aws/aws-sdk-go v1.49.7
github.com/cuducos/chunk v1.1.2
github.com/cuducos/go-cnpj v0.1.1
github.com/dgraph-io/badger/v4 v4.2.0
Expand Down Expand Up @@ -34,6 +35,7 @@ require (
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect
github.com/jackc/pgx/v4 v4.18.1 // indirect
github.com/jackc/puddle/v2 v2.2.1 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/klauspost/compress v1.17.2 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/newrelic/go-agent/v3/integrations/logcontext-v2/nrwriter v1.0.0 // indirect
Expand Down
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ github.com/andybalholm/brotli v1.0.6 h1:Yf9fFpf49Zrxb9NlQaluyE92/+X7UVHlhMNJN2sx
github.com/andybalholm/brotli v1.0.6/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0=
github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
github.com/aws/aws-sdk-go v1.49.7 h1:qQAOWYajSCRQQUFt+OZZ4pgDg2Uf3h4bBQmYzPyyka8=
github.com/aws/aws-sdk-go v1.49.7/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
Expand Down Expand Up @@ -133,6 +135,10 @@ github.com/jackc/puddle v1.1.3/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dv
github.com/jackc/puddle v1.3.0/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
Expand Down Expand Up @@ -359,6 +365,8 @@ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/inconshreveable/log15.v2 v2.0.0-20180818164646-67afb5ed74ec/go.mod h1:aPpfJ7XW+gOuirDoZ8gHhLh3kZ1B08FtV2bbmy7Jv3s=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
Expand Down
112 changes: 112 additions & 0 deletions mirror/cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package mirror

import (
"bytes"
"encoding/json"
"fmt"
"html/template"
"time"
_ "embed"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
)

const (
cacheExpiration = 12 * time.Hour
)

//go:embed index.html
var home string
type Cache struct {
settings settings
createdAt time.Time
template *template.Template
HTML []byte
JSON []byte
}

func (c *Cache) isExpired() bool {
return time.Since(c.createdAt) > cacheExpiration
}

type JSONResponse struct {
Data []Group `json:"data"`
}

func (c *Cache) refresh() error {
var fs []File
sess, err := session.NewSession(&aws.Config{
Region: aws.String(c.settings.region),
Endpoint: aws.String(c.settings.endpointURL),
S3ForcePathStyle: aws.Bool(true),
Credentials: credentials.NewStaticCredentials(
c.settings.accessKey,
c.settings.secretAccessKey,
"",
),
})
if err != nil {
return err
}

var token *string
loadPage := func(t *string) ([]File, *string, error) {
var fs []File
sdk := s3.New(sess)
r, err := sdk.ListObjectsV2(&s3.ListObjectsV2Input{
Bucket: aws.String(c.settings.bucket),
ContinuationToken: t,
})
if err != nil {
return []File{}, nil, err
}
for _, obj := range r.Contents {
url := fmt.Sprintf("%s%s", c.settings.publicDomain, *obj.Key)
fs = append(fs, File{url, *obj.Size, *obj.Key, *obj.LastModified})
}
if *r.IsTruncated {
return fs, r.NextContinuationToken, nil
}
return fs, nil, nil
}
for {
r, nxt, err := loadPage(token)
if err != nil {
return err
}
fs = append(fs, r...)
if nxt == nil {
break
}
token = nxt
}

data := newGroups(fs)
var h bytes.Buffer
c.template.Execute(&h, data)
c.HTML = h.Bytes()

var j bytes.Buffer
if err := json.NewEncoder(&j).Encode(JSONResponse{data}); err != nil {
return err
}
c.JSON = j.Bytes()

c.createdAt = time.Now()
return nil
}

func newCache(s settings) (*Cache, error) {
t, err := template.New("home").Parse(home)
if err != nil {
return nil, err
}
c := Cache{s, time.Now(), t, []byte{}, []byte{}}
if err := c.refresh(); err != nil {
return nil, err
}
return &c, nil
}
65 changes: 65 additions & 0 deletions mirror/file.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package mirror

import (
"fmt"
"sort"
"strings"
"time"
)

const unit = 1024

type File struct {
URL string `json:"url"`
Size int64 `json:"size"`
name string
lastModifiedAt time.Time
}

func (f *File) HumanReadableSize() string {
if f.Size < unit {
return fmt.Sprintf("%d B", f.Size)
}
div, exp := int64(unit), 0
for n := f.Size / unit; n >= unit; n /= unit {
div *= unit
exp++
}
return fmt.Sprintf("%.1f %cB", float64(f.Size)/float64(div), "KMGTPE"[exp])
}

func (f *File) ShortName() string {
p := strings.Split(f.name, "/")
return p[len(p)-1]
}

func (f *File) group() string {
p := strings.Split(f.name, "/")
if len(p) == 1 {
return "Binários"
}
return p[0]
}

type Group struct {
Name string `json:"name"`
Files []File `json:"urls"`
}

func newGroups(fs []File) []Group {
var m = make(map[string][]File)
for _, f := range fs {
n := f.group()
m[n] = append(m[n], f)
}
ks := []string{}
for k := range m {
ks = append(ks, k)
}
sort.Sort(sort.Reverse(sort.StringSlice(ks)))
var gs []Group
for _, k := range ks {
gs = append(gs, Group{k, m[k]})
}
return gs
}
58 changes: 58 additions & 0 deletions mirror/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
<!DOCTYPE html>
<html lang="pt-BR">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/semantic-ui/dist/semantic.min.css">
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/semantic-ui/dist/semantic.min.js"></script>
<title>Espelho de dados — Minha Receita</title>
</head>
<body>
<div class="ui container">
<h1 class="ui center aligned header">
<img alt="Minha Receita" src="https://docs.minhareceita.org/minha-receita.svg" style="width: auto; height: 7rem; margin: 2rem 0"><br>
Espelho de Dados
</h1>
<div class="ui accordion">
{{ range . }}
<div class="title">
<i class="dropdown icon"></i>
{{ .Name }}
</div>
<div class="content">
<table class="ui single line table">
<thead>
<tr>
<th>Name</th>
<th>Size</th>
</tr>
</thead>
<tbody>
{{ range .Files }}
<tr>
<td><a href="{{ .URL }}">{{ .ShortName }}</a></td>
<td>{{ .HumanReadableSize }}</td>
</tr>
{{ end }}
</tbody>
</table>
</div>
{{ end }}
</div>
<div class="ui divider"></div>
<div>
<p class="right aligned">
<i class="github icon"></i>
Código-fonte:
<a href="https://github.com/cuducos/minha-receita">Minha Receita</a>
</p>
</div>
</div>
<script>
$(document).ready(function(){
$('.ui.accordion').accordion();
});
</script>
</body>
</html>

0 comments on commit 8e75cd8

Please sign in to comment.