seedbox_sync.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. # rsync files from a seedbox to a local machine, exactly once, over SSH.
  2. #
  3. # Why?
  4. # *arr requires that any Remote Path Mappings have a local path reflecting its contents. This can be done with NFS or SSHFS, but those are difficult to set up in containers, and get wonky when the remote server reboots.
  5. # rsync over SSH + cron doesn't care if the remote machine reboots, and easily runs in a container.
  6. # How?
  7. # Usage: sonarr_sync.py my-seedbox /seedbox/path/to/data /local/working /local/metadata /local/data
  8. # - Get all file names in my-seedbox:/seedbox/path/to/data
  9. # - Get all previously processed file names in /local/metadata
  10. # - Diff the above to get newly added files
  11. # - For each new file:
  12. # - Copy file from my-seedbox to /local/working (used in case of transfer failure)
  13. # - Add file name to /local/metadata
  14. # - Move file to /local/data
  15. # */1 * * * * /usr/bin/run-one /usr/bin/python3 /path/to/seedbox_sync.py <seedbox host> /seedbox/path/to/completed/ /local/path/to/downloading /local/path/to/processed /local/path/to/ready 2>&1 | /usr/bin/logger -t seedbox
  16. # Or run it in a k8s cronjob. See seedbox-sync.yaml
  17. # kubectl -n plex create configmap seedbox-sync --from-file=seedbox_sync.py
  18. import subprocess
  19. import sys
  20. import concurrent.futures
  21. if len(sys.argv) != 6:
  22. print("One or more args are undefined")
  23. sys.exit(1)
  24. host, host_data_path, local_working_path, local_metadata_path, local_data_path = sys.argv[1:6]
  25. r = subprocess.run(["ssh", host, "bash", "-c", f"IFS=$'\n'; ls {host_data_path}"], stdout=subprocess.PIPE, check=True)
  26. available_files = {f for f in r.stdout.decode().split('\n') if f}
  27. # There's better ways to list a dir locally, but using bash & ls again reduces possible formatting discrepencies.
  28. r = subprocess.run(["bash", "-c", f"IFS=$'\n'; ls {local_metadata_path}"], stdout=subprocess.PIPE, check=True)
  29. processed_files = {f for f in r.stdout.decode().split('\n') if f}
  30. new_files = available_files - processed_files
  31. def process_file(new_file: str) -> None:
  32. # Be super cautious about empty file names, wouldn't want to `rm -rf` a folder by accident
  33. if not new_file:
  34. return
  35. print(f"Processing: {new_file}")
  36. subprocess.run(["rsync", "-rsvv", f'{host}:{host_data_path}/{new_file}', f'{local_working_path}'], check=True)
  37. subprocess.run(["touch", f'{local_metadata_path}/{new_file}'], check=True)
  38. print(f"Moving to ready: {new_file}")
  39. try:
  40. subprocess.run(["mv", f'{local_working_path}/{new_file}', f'{local_data_path}'], check=True)
  41. except:
  42. subprocess.run(["rm", f'{local_metadata_path}/{new_file}'], check=False)
  43. raise
  44. subprocess.run(["rm", "-rf", f'{local_working_path}/{new_file}'], check=True)
  45. with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
  46. future_to_new_files = {executor.submit(process_file, new_file): new_file for new_file in new_files}
  47. for future in concurrent.futures.as_completed(future_to_new_files):
  48. new_file = future_to_new_files[future]
  49. try:
  50. data = future.result()
  51. print(f"Processed {new_file}")
  52. except Exception as exc:
  53. print(f"{new_file} generated an exception: {exc}")