seedbox_sync.py 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. # rsync files from a seedbox to a local machine, exactly once, over SSH.
  2. #
  3. # Why?
  4. # *arr requires that any Remote Path Mappings have a local path reflecting its contents. This can be done with NFS or SSHFS, but those are difficult to set up in containers, and get wonky when the remote server reboots.
  5. # rsync over SSH + cron doesn't care if the remote machine reboots, and easily runs in a container.
  6. # How?
  7. # Usage: sonarr_sync.py my-seedbox /seedbox/path/to/data /local/working /local/metadata /local/data
  8. # - Get all file names in my-seedbox:/seedbox/path/to/data
  9. # - Get all previously processed file names in /local/metadata
  10. # - Diff the above to get newly added files
  11. # - For each new file:
  12. # - Copy file from my-seedbox to /local/working (used in case of transfer failure)
  13. # - Add file name to /local/metadata
  14. # - Move file to /local/data
  15. # */1 * * * * /usr/bin/run-one /usr/bin/python3 /path/to/seedbox_sync.py <seedbox host> /seedbox/path/to/completed/ /local/path/to/downloading /local/path/to/processed /local/path/to/ready 2>&1 | /usr/bin/logger -t seedbox
  16. # Or run it in a k8s cronjob.
  17. import subprocess
  18. import sys
  19. import concurrent.futures
  20. if len(sys.argv) != 6:
  21. print("One or more args are undefined")
  22. sys.exit(1)
  23. host, host_data_path, local_working_path, local_metadata_path, local_data_path = sys.argv[1:6]
  24. r = subprocess.run(["ssh", host, "bash", "-c", f"IFS=$'\n'; ls {host_data_path}"], stdout=subprocess.PIPE, check=True)
  25. available = {f for f in r.stdout.decode().split('\n') if f}
  26. # There's better ways to list a dir locally, but using bash & ls again reduces possible formatting discrepencies.
  27. r = subprocess.run(["bash", "-c", f"IFS=$'\n'; ls {local_metadata_path}"], stdout=subprocess.PIPE, check=True)
  28. processed = {f for f in r.stdout.decode().split('\n') if f}
  29. new = available - processed
  30. def process_file(new_file: str) -> None:
  31. # Be super cautious about empty file names, wouldn't want to `rm -rf` a folder by accident
  32. if not new_file:
  33. return
  34. print(f"Processing: {new_file}")
  35. subprocess.run(["rsync", "-rsvv", f'{host}:{host_data_path}/{new_file}', f'{local_working_path}'], check=True)
  36. subprocess.run(["touch", f'{local_metadata_path}/{new_file}'], check=True)
  37. print(f"Moving to ready: {new_file}")
  38. try:
  39. subprocess.run(["mv", f'{local_working_path}/{new_file}', f'{local_data_path}'], check=True)
  40. except:
  41. subprocess.run(["rm", f'{local_metadata_path}/{new_file}'], check=True)
  42. raise
  43. subprocess.run(["rm", "-rf", f'{local_working_path}/{new_file}'], check=True)
  44. with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
  45. future_to_new_files = {executor.submit(process_file, new_file): new_file for new_file in new}
  46. for future in concurrent.futures.as_completed(future_to_new_files):
  47. new_file = future_to_new_files[future]
  48. try:
  49. data = future.result()
  50. print(f"Processed {new_file}")
  51. except Exception as exc:
  52. print('%r generated an exception: %s' % (new_file, exc))