ChenRocks · leonardyeoxl · Jul 23, 2020
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,15 @@
+FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
+
+#set up environment
+RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository ppa:deadsnakes/ppa && \
+ apt-get update && apt-get install -y python3.6 python3.6-dev python3-pip libxml-parser-perl
+RUN pip3 install --upgrade pip
+
+RUN mkdir src
+WORKDIR src/
+ADD requirements.txt .
+RUN pip3 install torch==0.4.0 -f https://download.pytorch.org/whl/cu90/stable
+RUN pip3 install -r requirements.txt
+
+ENV LANG=C.UTF-8
+ENV PYTHONIOENCODING=latin-1
diff --git a/README.md b/README.md
@@ -71,6 +71,17 @@ Using `acl` you can reproduce the results reported in our paper.
 Using `new` you will get our latest result trained with a newer version of PyTorch library
 which leads to slightly higher scores.
 
+If using a Linux based system:
+```sh
+apt-get -y update && apt-get install libxml-parser-perl
+```
+
+Add the following environment variables:
+```sh
+export LANG=C.UTF-8
+export PYTHONIOENCODING=latin-1
+```
+
 To decode, run
 ```
 python decode_full_model.py --path=[path/to/save/decoded/files] --model_dir=[path/to/pretrained] --beam=[beam_size] [--test/--val]
@@ -88,6 +99,15 @@ Next, make the reference files for evaluation:
 ```
 python make_eval_references.py
 ```
+
+```sh
+git clone https://github.com/tagucci/pythonrouge.git
+cd pythonrouge/RELEASE-1.5.5/data/
+./WordNet-2.0-Exceptions/buildExeptionDB.pl ./WordNet-2.0-Exceptions ./smart_common_words.txt ./WordNet-2.0.exc.db
+cp WordNet-2.0.exc.db pyrouge/tools/ROUGE-1.5.5/data/
+export ROUGE=pyrouge/tools/ROUGE-1.5.5/data
+```
+
 and then run evaluation by:
 ```
 python eval_full_model.py --[rouge/meteor] --decode_dir=[path/to/save/decoded/files]

diff --git a/decoding.py b/decoding.py
@@ -18,15 +18,15 @@
 from data.data import CnnDmDataset
 
 
-try:
- DATASET_DIR = os.environ['DATA']
-except KeyError:
- print('please use environment variable to specify data directories')
 
 class DecodeDataset(CnnDmDataset):
  """ get the article sentences only (for decoding use)"""
  def __init__(self, split):
  assert split in ['val', 'test']
+ try:
+ DATASET_DIR = os.environ.get('DATA')
+ except KeyError:
+ print('please use environment variable to specify data directories')
  super().__init__(split, DATASET_DIR)
 
  def __getitem__(self, i):

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 pytorch=0.4.0
-gensim
-tensorboardX
-cytoolz
-pyrouge
+gensim==3.8.3
+tensorboardX==2.1
+cytoolz==0.10.1
+pyrouge==0.1.3