summaryrefslogtreecommitdiffhomepage
path: root/public/v4/apps/paperless-ng.yml
blob: 88ca98b6fa5f8c14d4b3232dad084d258cd593cb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
captainVersion: 4
services:
    # Paperless-ng
    $$cap_appname:
        depends_on:
            - $$cap_appname-db
            - $$cap_appname-redis
        image: jonaswinkler/paperless-ng:$$cap_app_version
        restart: always
        environment:
            APP_URL: http://$$cap_appname.$$cap_root_domain
            # Required services (https://paperless-ng.readthedocs.io/en/latest/configuration.html#required-services)
            PAPERLESS_REDIS: redis://srv-captain--$$cap_appname-redis:6379/0
            PAPERLESS_DBHOST: srv-captain--$$cap_appname-db
            PAPERLESS_DBNAME: $$cap_dbname
            PAPERLESS_DBUSER: $$cap_dbuser
            PAPERLESS_DBPASS: $$cap_dbpass
            # Paths and folders (https://paperless-ng.readthedocs.io/en/latest/configuration.html#paths-and-folders)
            PAPERLESS_FILENAME_FORMAT: $$cap_filename_format
            # Logging (https://paperless-ng.readthedocs.io/en/latest/configuration.html#logging)
            PAPERLESS_LOGROTATE_MAX_SIZE: $$cap_logrotate_max_size
            PAPERLESS_LOGROTATE_MAX_BACKUPS: $$cap_logrotate_max_backup
            # Hosting & Security (https://paperless-ng.readthedocs.io/en/latest/configuration.html#hosting-security)
            PAPERLESS_SECRET_KEY: $$cap_secret_key
            PAPERLESS_ALLOWED_HOSTS: $$cap_appname.$$cap_root_domain
            PAPERLESS_CORS_ALLOWED_HOSTS: http://$$cap_appname.$$cap_root_domain
            PAPERLESS_ADMIN_USER: $$cap_admin_user
            PAPERLESS_ADMIN_PASSWORD: $$cap_admin_password
            PAPERLESS_ADMIN_MAIL: $$cap_admin_email
            PAPERLESS_COOKIE_PREFIX: $$cap_cookie_prefix
            # OCR settings (https://paperless-ng.readthedocs.io/en/latest/configuration.html#ocr-settings)
            PAPERLESS_OCR_LANGUAGE: $$cap_ocr_language
            PAPERLESS_OCR_MODE: $$cap_ocr_mode
            PAPERLESS_OCR_CLEAN: $$cap_ocr_clean
            PAPERLESS_OCR_DESKEW: $$cap_ocr_deskew
            PAPERLESS_OCR_ROTATE_PAGES: $$cap_ocr_rotate_pages
            PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD: $$cap_ocr_rotate_threshold
            PAPERLESS_OCR_OUTPUT_TYPE: $$cap_ocr_output_type
            PAPERLESS_OCR_PAGES: $$cap_ocr_pages
            PAPERLESS_OCR_IMAGE_DPI: $$cap_ocr_image_dpi
            PAPERLESS_OCR_USER_ARGS: $cap_ocr_user_args
            # Tika settings (https://paperless-ng.readthedocs.io/en/latest/configuration.html#tika-settings)
            PAPERLESS_TIKA_ENABLED: 1
            PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://srv-captain--$$cap_appname-gotenberg:3000
            PAPERLESS_TIKA_ENDPOINT: http://srv-captain--$$cap_appname-tika:9998
            # Software tweaks (https://paperless-ng.readthedocs.io/en/latest/configuration.html#software-tweaks)
            # PAPERLESS_TASK_WORKERS: $$cap_task_workers
            # PAPERLESS_THREADS_PER_WORKER: $$cap_threads_per_worker
            PAPERLESS_TIME_ZONE: $$cap_timezone
            PAPERLESS_CONSUMER_POLLING: $$cap_consumer_polling
            PAPERLESS_CONSUMER_DELETE_DUPLICATES: $$cap_consumer_delete_duplicates
            PAPERLESS_CONSUMER_RECURSIVE: $$cap_consumer_recursive
            PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS: $$cap_consumer_subdirs_as_tags
            PAPERLESS_CONVERT_MEMORY_LIMIT: $$cap_convert_memory_limit
            PAPERLESS_CONVERT_TMPDIR: $$cap_convert_tempdir
            PAPERLESS_OPTIMIZE_THUMBNAILS: $$cap_optimize_thumbnails
            PAPERLESS_PRE_CONSUME_SCRIPT: $$cap_pre_consume_script
            PAPERLESS_POST_CONSUME_SCRIPT: $$cap_post_consume_script
            PAPERLESS_FILENAME_DATE_ORDER: $$cap_filename_date_order
            PAPERLESS_THUMBNAIL_FONT_NAME: $$cap_thumbnail_font_name
            PAPERLESS_IGNORE_DATES: $$cap_paperless_ignore_dates
            # Docker-specific options (https://paperless-ng.readthedocs.io/en/latest/configuration.html#docker-specific-options)
            PAPERLESS_WEBSERVER_WORKERS: $$cap_webserver_workers
            USERMAP_UID: $$cap_usermap_uid
            USERMAP_GID: $$cap_usermap_gid
            PAPERLESS_OCR_LANGUAGES: $$cap_docker_ocr_languages
        volumes:
            - $$cap_appname-data:/usr/src/paperless/data
            - $$cap_appname-media:/usr/src/paperless/media
            - $$cap_appname-export:/usr/src/paperless/export
            - $$cap_appname-consume:/usr/src/paperless/consume
        caproverExtra:
            containerHttpPort: '8000'

    # Redis
    $$cap_appname-redis:
        volumes:
            - $$cap_appname-redis-data:/data
        restart: always
        caproverExtra:
            dockerfileLines:
                - FROM redis:$$cap_redis_version
                - CMD exec redis-server
            notExposeAsWebApp: 'true'

    # Database
    $$cap_appname-db:
        image: postgres:$$cap_postgres_version
        volumes:
            - $$cap_appname-db:/var/lib/postgresql/data
        restart: always
        environment:
            POSTGRES_DB: $$cap_dbname
            POSTGRES_USER: $$cap_dbuser
            POSTGRES_PASSWORD: $$cap_dbpass
        caproverExtra:
            notExposeAsWebApp: 'true'

    # gotenberg
    $$cap_appname-gotenberg:
        image: thecodingmachine/gotenberg:6
        restart: unless-stopped
        environment:
            DISABLE_GOOGLE_CHROME: 1
        caproverExtra:
            containerHttpPort: '3000'

    # tika
    $$cap_appname-tika:
        image: apache/tika
        restart: unless-stopped
        caproverExtra:
            containerHttpPort: '9998'

caproverOneClickApp:
    variables:
        - id: $$cap_app_version
          label: Paperless-ng
          defaultValue: '1.4.1'
          description: Check out their docker page for the valid tags https://hub.docker.com/r/jonaswinkler/paperless-ng/tags

        - id: $$cap_postgres_version
          label: Postgres Version
          defaultValue: '13'
          description: Check out their Docker page for the valid tags https://hub.docker.com/r/library/postgres/tags/
          validRegex: /^([^\s^\/])+$/

        - id: $$cap_redis_version
          label: Redis version
          defaultValue: '6.2.1-alpine'
          validRegex: /^([^\s^\/])+$/

        - id: $$cap_dbname
          label: Database Name
          defaultValue: 'paperless'
          validRegex: /^([^\s^\/])+$/

        - id: $$cap_dbuser
          label: Database User
          defaultValue: 'paperless'
          validRegex: /^([^\s^\/])+$/

        - id: $$cap_dbpass
          label: Database Password
          defaultValue: $$cap_gen_random_hex(64)
          validRegex: /^([^\s^\/])+$/

        - id: $$cap_filename_format
          label: Filename Format
          defaultValue:
          description: 'Changes the filenames paperless uses to store documents in the media directory. See File name handling (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html#advanced-file-name-handling) for details. Default is none, which disables this feature.'

        - id: $$cap_logrotate_max_size
          label: Log Rotate Max Size (in bytes)
          description: Maximum file size for log files before they are rotated, in bytes.

        - id: $$cap_logrotate_max_backup
          label: Log Rotate Max Backup Count
          defaultValue: '20'
          validRegex: /^\d{0,}$/
          description: Number of rotated log files to keep.

        - id: $$cap_secret_key
          label: Paperless Secret Key
          defaultValue: $$cap_gen_random_hex(64)
          validRegex: /^([^\s^\/])+$/
          description: Paperless uses this to make session tokens.

        - id: $$cap_admin_user
          label: Admin User
          defaultValue: 'admin'
          validRegex: /^([^\s^\/])+$/

        - id: $$cap_admin_password
          label: Admin Password
          defaultValue: $$cap_gen_random_hex(10)
          validRegex: /^([^\s^\/])+$/

        - id: $$cap_admin_email
          label: Admin Email
          defaultValue: root@localhost
          validRegex: /^([^\s^\/])+$/

        - id: $$cap_cookie_prefix
          label: Cookie Prefix
          defaultValue: $$cap_appname
          description: Specify a prefix that is added to the cookies used by paperless to identify the currently logged in user. This is useful for when you’re running two instances of paperless on the same host.

        - id: $$cap_ocr_language
          label: OCR Language
          defaultValue: 'eng'
          description: Customize the language that paperless will attempt to use when parsing documents. It should be a 3-letter language code consistent with ISO 639 https://www.loc.gov/standards/iso639-2/php/code_list.php This can be a combination of multiple languages such as deu+eng, in which case tesseract will use whatever language matches best. Keep in mind that tesseract uses much more cpu time with multiple languages enabled.

        - id: $$cap_ocr_mode
          label: OCR Mode
          defaultValue: 'skip'
          validRegex: /^(skip|skip_noarchive|redo|force)$/
          description: Tell paperless when and how to perform ocr on your documents. Four modes are available- skip, skip_noarchive, redo, force. Read more about this in the OCRmyPDF documentation (https://ocrmypdf.readthedocs.io/en/latest/advanced.html#when-ocr-is-skipped).

        - id: $$cap_ocr_clean
          label: OCR Clean
          defaultValue: 'clean'
          validRegex: /^(clean|clean-final|none)$/
          description: Tells paperless to use unpaper to clean any input document before sending it to tesseract. This uses more resources, but generally results in better OCR results. Available mode- clean, clean-final, none

        - id: $$cap_ocr_deskew
          label: OCR Deskew
          defaultValue: 'true'
          validRegex: /^(true|false)$/
          description: Tells paperless to correct skewing (slight rotation of input images mainly due to improper scanning).

        - id: $$cap_ocr_rotate_pages
          label: OCR Rotate Pages
          defaultValue: 'true'
          validRegex: /^(true|false)$/
          description: Tells paperless to correct page rotation (90°, 180° and 270° rotation). If you notice that paperless is not rotating incorrectly rotated pages (or vice versa), try adjusting the threshold up or down (see below).

        - id: $$cap_ocr_rotate_threshold
          label: OCR Rotate Pages Threshold
          defaultValue: '12'
          validRegex: /^\d[\d.]*$/
          description: This is an arbitrary value reported by tesseract. “15” is a very conservative value, whereas “2” is a very aggressive option and will often result in correctly rotated pages being rotated as well.

        - id: $$cap_ocr_output_type
          label: OCR Output Type
          defaultValue: 'pdfa'
          validRegex: /^(pdf|pdfa|pdfa-1|pdfa-2|pdfa-3)$/
          description: Specify the the type of PDF documents that paperless should produce. Choices- pdf, pdfa, pdfa-1, pdfa-2, pdfa-3

        - id: $$cap_ocr_pages
          label: OCR Pages Count
          defaultValue: '0'
          validRegex: /^\d{1,}$/
          description: Tells paperless to use only the specified amount of pages for OCR. Documents with less than the specified amount of pages get OCR’ed completely. Specifying 1 here will only use the first page. Specifying 0 disables this feature a and always use all pages.

        - id: $$cap_ocr_image_dpi
          label: OCR Image DPI
          description: Set this to the DPI your scanner produces images at. Default is none, which will automatically calculate image DPI so that the produced PDF documents are A4 sized.

        - id: $cap_ocr_user_args
          label: OCR User Args
          description: See https://ocrmypdf.readthedocs.io/en/latest/api.html#reference for valid parameters. Specify arguments as a JSON dictionary. Keep note of lower case booleans and double quoted parameter names and strings.

        #   commenting out but keeping the code if somone tries to add these variables
        #   these are optional variables and paperless-ng calculates automatically if no value is provided
        #   but leaving it blank raises ValueError: invalid literal for int() with base 10:
        # - id: $$cap_task_workers
        #   label: Paperless Task Workers
        #   validRegex: /^\d{0,2}$/
        #   description: Leave blank to calculate automatically based on CPU core count

        # - id: $$cap_threads_per_worker
        #   label: Paperless Threads per Workers
        #   validRegex: /^\d{0,2}$/
        #   description: Leave blank to calculate automatically based on CPU core count. Ensure that the product PAPERLESS_TASK_WORKERS * PAPERLESS_THREADS_PER_WORKER does not exceed your CPU core count

        - id: $$cap_timezone
          label: Timezone
          defaultValue: 'UTC'
          description: 'Set the time zone here. See https://docs.djangoproject.com/en/3.1/ref/settings/#std:setting-TIME_ZONE for details on how to set it.'

        - id: $$cap_consumer_polling
          label: Consumer Polling (seconds)
          defaultValue: '0'
          validRegex: /^\d{1,}$/
          description: Defaults to 0, which disables polling and uses filesystem notifications.

        - id: $$cap_consumer_delete_duplicates
          label: Consumer Delete Duplicates
          defaultValue: 'false'
          validRegex: /^(true|false)$/
          description: When the consumer detects a duplicate document, it will not touch the original document.

        - id: $$cap_consumer_recursive
          label: Consumer Recursive
          defaultValue: 'false'
          validRegex: /^(true|false)$/
          description: Enable recursive watching of the consumption directory. Paperless will then pickup files from files in subdirectories within your consumption directory as well.

        - id: $$cap_consumer_subdirs_as_tags
          label: Consumer Sub Directory as tags
          defaultValue: 'false'
          validRegex: /^(true|false)$/
          description: Set the names of subdirectories as tags for consumed files. E.g. <CONSUMPTION_DIR>/foo/bar/file.pdf will add the tags “foo” and “bar” to the consumed file

        - id: $$cap_convert_memory_limit
          label: Convert Memory Limit
          defaultValue: '0'
          validRegex: /^\d{1,}$/
          description: For more information on how to use this value, you should search the web for “MAGICK_MEMORY_LIMIT”.

        - id: $$cap_convert_tempdir
          label: Convert Memory Limit
          description: For more information on how to use this value, you should search the web for “MAGICK_TMPDIR”.

        - id: $$cap_optimize_thumbnails
          label: Optimize Thumbnails
          defaultValue: 'true'
          validRegex: /^(true|false)$/
          description: This usually reduces the size of thumbnails by about 20%, but uses considerable compute time during consumption.

        - id: $$cap_pre_consume_script
          label: Pre Consume Script (path)
          description: Executed after the consumer sees a new document in the consumption folder, but before any processing of the document is performed. For more information, take a look at Pre-consumption script (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html?highlight=PAPERLESS_PRE_CONSUME_SCRIPT#pre-consumption-script)

        - id: $$cap_post_consume_script
          label: Post Consume Script (path)
          description: Executed after the consumer has successfully processed a document and has moved it into paperless. For more information, take a look at Post-consumption script (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html#advanced-post-consume-script)

        - id: $$cap_filename_date_order
          label: Filename Date Order
          description: Defaults to none, which disables this feature. The date order can be set to any option as specified in https://dateparser.readthedocs.io/en/latest/settings.html#date-order.

        - id: $$cap_thumbnail_font_name
          label: Thumbnail Font Name
          defaultValue: '/usr/share/fonts/liberation/LiberationSerif-Regular.ttf'
          description: Paperless creates thumbnails for plain text files by rendering the content of the file on an image and uses a predefined font for that

        - id: $$cap_paperless_ignore_dates
          label: Ignore Dates
          description: You may specify dates in a multitude of formats supported by dateparser (see https://dateparser.readthedocs.io/en/latest/#popular-formats)

        - id: $$cap_webserver_workers
          label: Webserver Workers
          defaultValue: '2'
          validRegex: /^\d{1,}$/
          description: The number of worker processes the webserver should spawn.

        - id: $$cap_usermap_uid
          label: Usermap UID
          defaultValue: '1000'
          validRegex: /^\d{1,}$/
          description: Set this to your actual user ID on the host system, which you can get by executing ```id -u```

        - id: $$cap_usermap_gid
          label: Usermap GID
          defaultValue: '1000'
          validRegex: /^\d{1,}$/
          description: Set this to your actual user ID on the host system, which you can get by executing ```id -g```

        - id: $$cap_docker_ocr_languages
          label: OCR Languages to install
          description: Additional OCR languages to install. By default, paperless comes with English, German, Italian, Spanish and French.

    instructions:
        start: >-
            Paperless is an application by Daniel Quinn and others that indexes your scanned documents and allows you to easily search for documents and store metadata alongside your documents.
        end: >-
            Done! 😄
            Your service is available at http://$$cap_appname.$$cap_root_domain
    displayName: 'Paperless-ng'
    isOfficial: true
    description: Paperless is an application by Daniel Quinn and others that indexes your scanned documents and allows you to easily search for documents and store metadata alongside your documents.
    documentation: https://paperless-ng.readthedocs.io/en/latest/