Renumbering files in a folder consecutively











up vote
4
down vote

favorite
1












I wrote a program to rename files with consecutive numbers, basically removing the gaps in the numbering. This could be a very annoying task to do it manually in windows. I only did this for practicing Python purposes.



Lets show what the program does in an example:



Say you have a folder E:Spam which contains the following files:




eggs010.txt
spam00.txt
spam002.txt
spam004.txt

spam005.txt
spam5.dat
spam7.txt
spam009.txt

spam037.txt
spam045.txt
spam089.txt

spam2000.txt




Lets say for what reason you want to continues numbe all the files from type
spam.txt



After running my program it gives out this:




eggs010.txt (unchanged wrong filename)
spam0001.txt

spam0002.txt
spam0003.txt
spam0004.txt

spam0005.txt
spam5.dat (unchanged wrong filetype)

spam0006.txt
spam0007.txt
spam0008.txt

spam0009.txt
spam0010.txt




Also an offset can be added to start the numbering for example at 5 or 0 instead of.




filling_gaps.py



"""
Filling_gaps.
Removes gaps in numbering of multiple files in a specified directory.

Takes a directory to search for files which match prefix and type and
otherwise only contain numbers in the middle.
Files are renamed continuesyly and filled up with leading zeros if
necessary
It is possible to define with which number the numeration
starts

e.g.
spam001.txt
spam003.txt
spam013.txt
spam1180.txt

would become (if start_number = 1):

spam0001.txt
spam0002.txt
spam0003.txt
spam0004.txt

To prevent overriting in the renaming process a temporary directory is
created during the renaming.
"""

import os
import sys
import shutil
import stat
from typing import List

def get_filenames(directory: str) -> List[str]:
"""
get filenames in supplied absolute directory name
"""
os.chdir(directory)
return os.listdir('.')


def get_digits_minimum_length(filenames: List[str], file_prefix: str,
file_type: str, start_number: int) -> int:
"""
loops over all provided filenames to find the filename with the
longest digit string e.g:
spam00001.txt -> len = 5
spam02.txt -> len = 2
spam0000003.txt -> len = 7
digits_length == 7
"""
digits_length: int = 0
for filename in filenames:
if filename.startswith(file_prefix) and filename.endswith(file_type):
number_string: str = filename.lstrip(file_prefix)
number_string = number_string.rstrip(file_type)
if number_string.isdigit():
current_length = len(number_string)
if current_length > digits_length:
digits_length = current_length

if len(str(start_number)) > digits_length:
digits_length = start_number

return digits_length


def make_filename(file_prefix: str, file_type: str, number: int,
number_length: int) -> str:
"""
Creates a new filename out of the parts.
"""
return file_prefix + str(number).zfill(number_length) + file_type


def make_folder(directory: str, folder_name: str):
"""
Creates a folder inside a directory
"""
new_directory: str = os.path.join(directory, folder_name)
os.makedirs(new_directory)
os.chmod(new_directory, stat.S_IWRITE)


def rename_and_move_file(directory: str, new_directory: str,
old_filename: str, new_filename: str):
"""
Renames a file from a specific directory into a new directory to
prevent that renamoving overrites other existing files
"""
shutil.move(os.path.join(directory, old_filename),
os.path.join(new_directory, new_filename))


def erase_directory(delete_directory: str):
"""
checks if directory exists and deletes it.
"""
if os.path.exists(delete_directory):
shutil.rmtree(delete_directory)


def extract_digits_and_move_to_temp(
directory: str, temp_directory: str, file_prefix: str, file_type: str,
digit_lenght: int):
"""
Remove pre und postfix from files, add leading zeros to the
integer part to reach the same length for all digitsand copy them
to temp directory
"""
for filename in get_filenames(directory):
if filename.startswith(file_prefix) and filename.endswith(file_type):
number_string: str = filename.lstrip(file_prefix)
number_string = number_string.rstrip(file_type)
if number_string.isdigit():
rename_and_move_file(directory, temp_directory, filename,
number_string.zfill(digit_lenght))


def renumber_and_move_to_directory(
start_number: int, directory: str, temp_directory: str,
file_prefix: str, file_type: str, digit_lenght: int):
"""
Renumbers the files according to start file number and moves them
back to directory
"""
next_number: int = start_number
filenames = get_filenames(temp_directory)
filenames.sort()
for filename in filenames:
new_filename: str = make_filename(
file_prefix, file_type, next_number, digit_lenght)
rename_and_move_file(temp_directory, directory, filename, new_filename)
next_number = next_number + 1


def filling_gaps(directory: str, file_prefix: str, file_type: str,
start_number: int = 1):
"""
Main logic of the filling gaps script.
First a temporary directory is made for the file operations.
Then the max len of digits (int + leading zeros) is calculated out
of the files.
Then all matched files are stripped everything but the int part and
they are moved into the temp directory.
In the temp directory they are numerated correctly again and moved
back to the source directory.
In the end the temporary directory is deleted
"""
if start_number < 0:
raise Exception("Start file number must be >=0. Value: " +
str(start_number))

temp_folder_name = "temp"
temp_directory = os.path.join(directory, temp_folder_name)

if os.path.exists(temp_directory):
print("Error: temp directory used for internal copy already exists."
" Make sure it does not. directory name:" + temp_folder_name)
sys.exit(0)

make_folder(directory, temp_folder_name)

digits_length: int = get_digits_minimum_length(
get_filenames(directory), file_prefix, file_type, start_number)

extract_digits_and_move_to_temp(
directory, temp_directory, file_prefix, file_type, digits_length)

renumber_and_move_to_directory(
start_number, directory, temp_directory, file_prefix, file_type,
digits_length)

erase_directory(temp_directory)


filling_gaps("E:\Spam", "spam", ".txt", 1)


I would like to know if this is good code?
Is the approach a good one?
Do you understand the code from the comments and the docstrings?
Are there any bad practices you can spot
Are there any improvements / better approaches to try?



Also I tested this script by manually adding the example script again and again how could this be better tested?



Please let me know what you think.










share|improve this question




























    up vote
    4
    down vote

    favorite
    1












    I wrote a program to rename files with consecutive numbers, basically removing the gaps in the numbering. This could be a very annoying task to do it manually in windows. I only did this for practicing Python purposes.



    Lets show what the program does in an example:



    Say you have a folder E:Spam which contains the following files:




    eggs010.txt
    spam00.txt
    spam002.txt
    spam004.txt

    spam005.txt
    spam5.dat
    spam7.txt
    spam009.txt

    spam037.txt
    spam045.txt
    spam089.txt

    spam2000.txt




    Lets say for what reason you want to continues numbe all the files from type
    spam.txt



    After running my program it gives out this:




    eggs010.txt (unchanged wrong filename)
    spam0001.txt

    spam0002.txt
    spam0003.txt
    spam0004.txt

    spam0005.txt
    spam5.dat (unchanged wrong filetype)

    spam0006.txt
    spam0007.txt
    spam0008.txt

    spam0009.txt
    spam0010.txt




    Also an offset can be added to start the numbering for example at 5 or 0 instead of.




    filling_gaps.py



    """
    Filling_gaps.
    Removes gaps in numbering of multiple files in a specified directory.

    Takes a directory to search for files which match prefix and type and
    otherwise only contain numbers in the middle.
    Files are renamed continuesyly and filled up with leading zeros if
    necessary
    It is possible to define with which number the numeration
    starts

    e.g.
    spam001.txt
    spam003.txt
    spam013.txt
    spam1180.txt

    would become (if start_number = 1):

    spam0001.txt
    spam0002.txt
    spam0003.txt
    spam0004.txt

    To prevent overriting in the renaming process a temporary directory is
    created during the renaming.
    """

    import os
    import sys
    import shutil
    import stat
    from typing import List

    def get_filenames(directory: str) -> List[str]:
    """
    get filenames in supplied absolute directory name
    """
    os.chdir(directory)
    return os.listdir('.')


    def get_digits_minimum_length(filenames: List[str], file_prefix: str,
    file_type: str, start_number: int) -> int:
    """
    loops over all provided filenames to find the filename with the
    longest digit string e.g:
    spam00001.txt -> len = 5
    spam02.txt -> len = 2
    spam0000003.txt -> len = 7
    digits_length == 7
    """
    digits_length: int = 0
    for filename in filenames:
    if filename.startswith(file_prefix) and filename.endswith(file_type):
    number_string: str = filename.lstrip(file_prefix)
    number_string = number_string.rstrip(file_type)
    if number_string.isdigit():
    current_length = len(number_string)
    if current_length > digits_length:
    digits_length = current_length

    if len(str(start_number)) > digits_length:
    digits_length = start_number

    return digits_length


    def make_filename(file_prefix: str, file_type: str, number: int,
    number_length: int) -> str:
    """
    Creates a new filename out of the parts.
    """
    return file_prefix + str(number).zfill(number_length) + file_type


    def make_folder(directory: str, folder_name: str):
    """
    Creates a folder inside a directory
    """
    new_directory: str = os.path.join(directory, folder_name)
    os.makedirs(new_directory)
    os.chmod(new_directory, stat.S_IWRITE)


    def rename_and_move_file(directory: str, new_directory: str,
    old_filename: str, new_filename: str):
    """
    Renames a file from a specific directory into a new directory to
    prevent that renamoving overrites other existing files
    """
    shutil.move(os.path.join(directory, old_filename),
    os.path.join(new_directory, new_filename))


    def erase_directory(delete_directory: str):
    """
    checks if directory exists and deletes it.
    """
    if os.path.exists(delete_directory):
    shutil.rmtree(delete_directory)


    def extract_digits_and_move_to_temp(
    directory: str, temp_directory: str, file_prefix: str, file_type: str,
    digit_lenght: int):
    """
    Remove pre und postfix from files, add leading zeros to the
    integer part to reach the same length for all digitsand copy them
    to temp directory
    """
    for filename in get_filenames(directory):
    if filename.startswith(file_prefix) and filename.endswith(file_type):
    number_string: str = filename.lstrip(file_prefix)
    number_string = number_string.rstrip(file_type)
    if number_string.isdigit():
    rename_and_move_file(directory, temp_directory, filename,
    number_string.zfill(digit_lenght))


    def renumber_and_move_to_directory(
    start_number: int, directory: str, temp_directory: str,
    file_prefix: str, file_type: str, digit_lenght: int):
    """
    Renumbers the files according to start file number and moves them
    back to directory
    """
    next_number: int = start_number
    filenames = get_filenames(temp_directory)
    filenames.sort()
    for filename in filenames:
    new_filename: str = make_filename(
    file_prefix, file_type, next_number, digit_lenght)
    rename_and_move_file(temp_directory, directory, filename, new_filename)
    next_number = next_number + 1


    def filling_gaps(directory: str, file_prefix: str, file_type: str,
    start_number: int = 1):
    """
    Main logic of the filling gaps script.
    First a temporary directory is made for the file operations.
    Then the max len of digits (int + leading zeros) is calculated out
    of the files.
    Then all matched files are stripped everything but the int part and
    they are moved into the temp directory.
    In the temp directory they are numerated correctly again and moved
    back to the source directory.
    In the end the temporary directory is deleted
    """
    if start_number < 0:
    raise Exception("Start file number must be >=0. Value: " +
    str(start_number))

    temp_folder_name = "temp"
    temp_directory = os.path.join(directory, temp_folder_name)

    if os.path.exists(temp_directory):
    print("Error: temp directory used for internal copy already exists."
    " Make sure it does not. directory name:" + temp_folder_name)
    sys.exit(0)

    make_folder(directory, temp_folder_name)

    digits_length: int = get_digits_minimum_length(
    get_filenames(directory), file_prefix, file_type, start_number)

    extract_digits_and_move_to_temp(
    directory, temp_directory, file_prefix, file_type, digits_length)

    renumber_and_move_to_directory(
    start_number, directory, temp_directory, file_prefix, file_type,
    digits_length)

    erase_directory(temp_directory)


    filling_gaps("E:\Spam", "spam", ".txt", 1)


    I would like to know if this is good code?
    Is the approach a good one?
    Do you understand the code from the comments and the docstrings?
    Are there any bad practices you can spot
    Are there any improvements / better approaches to try?



    Also I tested this script by manually adding the example script again and again how could this be better tested?



    Please let me know what you think.










    share|improve this question


























      up vote
      4
      down vote

      favorite
      1









      up vote
      4
      down vote

      favorite
      1






      1





      I wrote a program to rename files with consecutive numbers, basically removing the gaps in the numbering. This could be a very annoying task to do it manually in windows. I only did this for practicing Python purposes.



      Lets show what the program does in an example:



      Say you have a folder E:Spam which contains the following files:




      eggs010.txt
      spam00.txt
      spam002.txt
      spam004.txt

      spam005.txt
      spam5.dat
      spam7.txt
      spam009.txt

      spam037.txt
      spam045.txt
      spam089.txt

      spam2000.txt




      Lets say for what reason you want to continues numbe all the files from type
      spam.txt



      After running my program it gives out this:




      eggs010.txt (unchanged wrong filename)
      spam0001.txt

      spam0002.txt
      spam0003.txt
      spam0004.txt

      spam0005.txt
      spam5.dat (unchanged wrong filetype)

      spam0006.txt
      spam0007.txt
      spam0008.txt

      spam0009.txt
      spam0010.txt




      Also an offset can be added to start the numbering for example at 5 or 0 instead of.




      filling_gaps.py



      """
      Filling_gaps.
      Removes gaps in numbering of multiple files in a specified directory.

      Takes a directory to search for files which match prefix and type and
      otherwise only contain numbers in the middle.
      Files are renamed continuesyly and filled up with leading zeros if
      necessary
      It is possible to define with which number the numeration
      starts

      e.g.
      spam001.txt
      spam003.txt
      spam013.txt
      spam1180.txt

      would become (if start_number = 1):

      spam0001.txt
      spam0002.txt
      spam0003.txt
      spam0004.txt

      To prevent overriting in the renaming process a temporary directory is
      created during the renaming.
      """

      import os
      import sys
      import shutil
      import stat
      from typing import List

      def get_filenames(directory: str) -> List[str]:
      """
      get filenames in supplied absolute directory name
      """
      os.chdir(directory)
      return os.listdir('.')


      def get_digits_minimum_length(filenames: List[str], file_prefix: str,
      file_type: str, start_number: int) -> int:
      """
      loops over all provided filenames to find the filename with the
      longest digit string e.g:
      spam00001.txt -> len = 5
      spam02.txt -> len = 2
      spam0000003.txt -> len = 7
      digits_length == 7
      """
      digits_length: int = 0
      for filename in filenames:
      if filename.startswith(file_prefix) and filename.endswith(file_type):
      number_string: str = filename.lstrip(file_prefix)
      number_string = number_string.rstrip(file_type)
      if number_string.isdigit():
      current_length = len(number_string)
      if current_length > digits_length:
      digits_length = current_length

      if len(str(start_number)) > digits_length:
      digits_length = start_number

      return digits_length


      def make_filename(file_prefix: str, file_type: str, number: int,
      number_length: int) -> str:
      """
      Creates a new filename out of the parts.
      """
      return file_prefix + str(number).zfill(number_length) + file_type


      def make_folder(directory: str, folder_name: str):
      """
      Creates a folder inside a directory
      """
      new_directory: str = os.path.join(directory, folder_name)
      os.makedirs(new_directory)
      os.chmod(new_directory, stat.S_IWRITE)


      def rename_and_move_file(directory: str, new_directory: str,
      old_filename: str, new_filename: str):
      """
      Renames a file from a specific directory into a new directory to
      prevent that renamoving overrites other existing files
      """
      shutil.move(os.path.join(directory, old_filename),
      os.path.join(new_directory, new_filename))


      def erase_directory(delete_directory: str):
      """
      checks if directory exists and deletes it.
      """
      if os.path.exists(delete_directory):
      shutil.rmtree(delete_directory)


      def extract_digits_and_move_to_temp(
      directory: str, temp_directory: str, file_prefix: str, file_type: str,
      digit_lenght: int):
      """
      Remove pre und postfix from files, add leading zeros to the
      integer part to reach the same length for all digitsand copy them
      to temp directory
      """
      for filename in get_filenames(directory):
      if filename.startswith(file_prefix) and filename.endswith(file_type):
      number_string: str = filename.lstrip(file_prefix)
      number_string = number_string.rstrip(file_type)
      if number_string.isdigit():
      rename_and_move_file(directory, temp_directory, filename,
      number_string.zfill(digit_lenght))


      def renumber_and_move_to_directory(
      start_number: int, directory: str, temp_directory: str,
      file_prefix: str, file_type: str, digit_lenght: int):
      """
      Renumbers the files according to start file number and moves them
      back to directory
      """
      next_number: int = start_number
      filenames = get_filenames(temp_directory)
      filenames.sort()
      for filename in filenames:
      new_filename: str = make_filename(
      file_prefix, file_type, next_number, digit_lenght)
      rename_and_move_file(temp_directory, directory, filename, new_filename)
      next_number = next_number + 1


      def filling_gaps(directory: str, file_prefix: str, file_type: str,
      start_number: int = 1):
      """
      Main logic of the filling gaps script.
      First a temporary directory is made for the file operations.
      Then the max len of digits (int + leading zeros) is calculated out
      of the files.
      Then all matched files are stripped everything but the int part and
      they are moved into the temp directory.
      In the temp directory they are numerated correctly again and moved
      back to the source directory.
      In the end the temporary directory is deleted
      """
      if start_number < 0:
      raise Exception("Start file number must be >=0. Value: " +
      str(start_number))

      temp_folder_name = "temp"
      temp_directory = os.path.join(directory, temp_folder_name)

      if os.path.exists(temp_directory):
      print("Error: temp directory used for internal copy already exists."
      " Make sure it does not. directory name:" + temp_folder_name)
      sys.exit(0)

      make_folder(directory, temp_folder_name)

      digits_length: int = get_digits_minimum_length(
      get_filenames(directory), file_prefix, file_type, start_number)

      extract_digits_and_move_to_temp(
      directory, temp_directory, file_prefix, file_type, digits_length)

      renumber_and_move_to_directory(
      start_number, directory, temp_directory, file_prefix, file_type,
      digits_length)

      erase_directory(temp_directory)


      filling_gaps("E:\Spam", "spam", ".txt", 1)


      I would like to know if this is good code?
      Is the approach a good one?
      Do you understand the code from the comments and the docstrings?
      Are there any bad practices you can spot
      Are there any improvements / better approaches to try?



      Also I tested this script by manually adding the example script again and again how could this be better tested?



      Please let me know what you think.










      share|improve this question















      I wrote a program to rename files with consecutive numbers, basically removing the gaps in the numbering. This could be a very annoying task to do it manually in windows. I only did this for practicing Python purposes.



      Lets show what the program does in an example:



      Say you have a folder E:Spam which contains the following files:




      eggs010.txt
      spam00.txt
      spam002.txt
      spam004.txt

      spam005.txt
      spam5.dat
      spam7.txt
      spam009.txt

      spam037.txt
      spam045.txt
      spam089.txt

      spam2000.txt




      Lets say for what reason you want to continues numbe all the files from type
      spam.txt



      After running my program it gives out this:




      eggs010.txt (unchanged wrong filename)
      spam0001.txt

      spam0002.txt
      spam0003.txt
      spam0004.txt

      spam0005.txt
      spam5.dat (unchanged wrong filetype)

      spam0006.txt
      spam0007.txt
      spam0008.txt

      spam0009.txt
      spam0010.txt




      Also an offset can be added to start the numbering for example at 5 or 0 instead of.




      filling_gaps.py



      """
      Filling_gaps.
      Removes gaps in numbering of multiple files in a specified directory.

      Takes a directory to search for files which match prefix and type and
      otherwise only contain numbers in the middle.
      Files are renamed continuesyly and filled up with leading zeros if
      necessary
      It is possible to define with which number the numeration
      starts

      e.g.
      spam001.txt
      spam003.txt
      spam013.txt
      spam1180.txt

      would become (if start_number = 1):

      spam0001.txt
      spam0002.txt
      spam0003.txt
      spam0004.txt

      To prevent overriting in the renaming process a temporary directory is
      created during the renaming.
      """

      import os
      import sys
      import shutil
      import stat
      from typing import List

      def get_filenames(directory: str) -> List[str]:
      """
      get filenames in supplied absolute directory name
      """
      os.chdir(directory)
      return os.listdir('.')


      def get_digits_minimum_length(filenames: List[str], file_prefix: str,
      file_type: str, start_number: int) -> int:
      """
      loops over all provided filenames to find the filename with the
      longest digit string e.g:
      spam00001.txt -> len = 5
      spam02.txt -> len = 2
      spam0000003.txt -> len = 7
      digits_length == 7
      """
      digits_length: int = 0
      for filename in filenames:
      if filename.startswith(file_prefix) and filename.endswith(file_type):
      number_string: str = filename.lstrip(file_prefix)
      number_string = number_string.rstrip(file_type)
      if number_string.isdigit():
      current_length = len(number_string)
      if current_length > digits_length:
      digits_length = current_length

      if len(str(start_number)) > digits_length:
      digits_length = start_number

      return digits_length


      def make_filename(file_prefix: str, file_type: str, number: int,
      number_length: int) -> str:
      """
      Creates a new filename out of the parts.
      """
      return file_prefix + str(number).zfill(number_length) + file_type


      def make_folder(directory: str, folder_name: str):
      """
      Creates a folder inside a directory
      """
      new_directory: str = os.path.join(directory, folder_name)
      os.makedirs(new_directory)
      os.chmod(new_directory, stat.S_IWRITE)


      def rename_and_move_file(directory: str, new_directory: str,
      old_filename: str, new_filename: str):
      """
      Renames a file from a specific directory into a new directory to
      prevent that renamoving overrites other existing files
      """
      shutil.move(os.path.join(directory, old_filename),
      os.path.join(new_directory, new_filename))


      def erase_directory(delete_directory: str):
      """
      checks if directory exists and deletes it.
      """
      if os.path.exists(delete_directory):
      shutil.rmtree(delete_directory)


      def extract_digits_and_move_to_temp(
      directory: str, temp_directory: str, file_prefix: str, file_type: str,
      digit_lenght: int):
      """
      Remove pre und postfix from files, add leading zeros to the
      integer part to reach the same length for all digitsand copy them
      to temp directory
      """
      for filename in get_filenames(directory):
      if filename.startswith(file_prefix) and filename.endswith(file_type):
      number_string: str = filename.lstrip(file_prefix)
      number_string = number_string.rstrip(file_type)
      if number_string.isdigit():
      rename_and_move_file(directory, temp_directory, filename,
      number_string.zfill(digit_lenght))


      def renumber_and_move_to_directory(
      start_number: int, directory: str, temp_directory: str,
      file_prefix: str, file_type: str, digit_lenght: int):
      """
      Renumbers the files according to start file number and moves them
      back to directory
      """
      next_number: int = start_number
      filenames = get_filenames(temp_directory)
      filenames.sort()
      for filename in filenames:
      new_filename: str = make_filename(
      file_prefix, file_type, next_number, digit_lenght)
      rename_and_move_file(temp_directory, directory, filename, new_filename)
      next_number = next_number + 1


      def filling_gaps(directory: str, file_prefix: str, file_type: str,
      start_number: int = 1):
      """
      Main logic of the filling gaps script.
      First a temporary directory is made for the file operations.
      Then the max len of digits (int + leading zeros) is calculated out
      of the files.
      Then all matched files are stripped everything but the int part and
      they are moved into the temp directory.
      In the temp directory they are numerated correctly again and moved
      back to the source directory.
      In the end the temporary directory is deleted
      """
      if start_number < 0:
      raise Exception("Start file number must be >=0. Value: " +
      str(start_number))

      temp_folder_name = "temp"
      temp_directory = os.path.join(directory, temp_folder_name)

      if os.path.exists(temp_directory):
      print("Error: temp directory used for internal copy already exists."
      " Make sure it does not. directory name:" + temp_folder_name)
      sys.exit(0)

      make_folder(directory, temp_folder_name)

      digits_length: int = get_digits_minimum_length(
      get_filenames(directory), file_prefix, file_type, start_number)

      extract_digits_and_move_to_temp(
      directory, temp_directory, file_prefix, file_type, digits_length)

      renumber_and_move_to_directory(
      start_number, directory, temp_directory, file_prefix, file_type,
      digits_length)

      erase_directory(temp_directory)


      filling_gaps("E:\Spam", "spam", ".txt", 1)


      I would like to know if this is good code?
      Is the approach a good one?
      Do you understand the code from the comments and the docstrings?
      Are there any bad practices you can spot
      Are there any improvements / better approaches to try?



      Also I tested this script by manually adding the example script again and again how could this be better tested?



      Please let me know what you think.







      python beginner file-system






      share|improve this question















      share|improve this question













      share|improve this question




      share|improve this question








      edited yesterday









      200_success

      127k15149412




      127k15149412










      asked yesterday









      Sandro4912

      772121




      772121






















          2 Answers
          2






          active

          oldest

          votes

















          up vote
          5
          down vote













          Bug



          You have a two bugs in get_digits_minimum_length():




          if len(str(start_number)) > digits_length:
          digits_length = start_number



          First of all, you probably meant digits_length = len(start_number). But even that is not enough, because the end number might have more digits than the starting number. Therefore, it should be:



          digits_length = max(digits_length, len(str(start_number + len(filenames) - 1)))


          File manipulation



          A better way to make a temporary directory within directory is tempfile.mkdtemp(dir=directory). It guarantees that the name of the new directory will not collide with any existing file or directory. (It automatically generates a different name as necessary to make that happen.)



          The temporary directory needs to have permissions stat.S_IRWXU. On Unix, stat.S_IWRITE (or stat.IWUSR) is insufficient, since you will not be able to call listdir() on the temporary directory.



          The program calls get_filenames() three times. Not only is that slow and wasteful, it also presents a possibility for inconsistencies due to race conditions, if a file gets created or removed while the program is running.



          shutil.move() is overkill, when os.rename() should work just as well. Also, it would be better to just do os.rmdir() rather than shutil.rmtree() to clean up the temporary directory, because os.rmdir() asserts that the directory is empty. (You wouldn't want to accidentally delete any of your files, would you? And if you did want to force a cleanup, with tempfile.TemporaryDirectory() as temporary_directory: … would be a more elegant way to write it.)



          Global state



          os.chdir() affects the global state of a process. I'd try to avoid doing it at all, if possible. And if you do do it, then I'd call it from a prominent place in your code, so that some innocuous-looking utility function (get_filenames()) does not have unexpected insidious side-effects on the rest of your code. After calling os.chdir(), you don't have to ever mention directory again in your code anymore, because every operation is relative to the current directory.



          Design and efficiency



          I am slightly annoyed by some of your very short helper functions: make_filename(), make_folder(), and erase_directory(). In my opinion, they make the code harder to read, because they add very little value over the standard library call, but I have to devote mental effort into keeping track of what they do. Each of those functions is called from just one place, which makes their existence even less worthwhile.



          I imagine that there might be use cases where this program is repeatedly executed for some directory. In that case, you would be moving files into and out of the temporary directory, most of them for naught. A better strategy would be to put more work into mapping the source filenames into their desired destinations (as in my filename_map() below), so that no more filesystem manipulation is done than necessary.



          Suggested solution



          import os
          import re
          import stat
          import tempfile

          def filename_map(prefix, suffix, start_number):
          """
          Make a dictionary that maps source filenames to their renumbered
          destination filename.
          """
          pattern = re.compile(re.escape(prefix) + '([0-9]+)' + re.escape(suffix), re.I)
          num_to_fn = {
          pattern.fullmatch(fn).group(1): fn
          for fn in os.listdir()
          if pattern.fullmatch(fn)
          }
          digits_length = max(
          max((len(num) for num in num_to_fn), default=0),
          len(str(start_number + len(num_to_fn) - 1))
          )
          return {
          num_to_fn[num]: prefix + str(i).zfill(digits_length) + suffix
          for i, num in enumerate(sorted(num_to_fn, key=int), start_number)
          }

          def filling_gaps(prefix, suffix, start_number=1):
          """
          Rename files in the current directory whose names consist of the given
          prefix, followed by some digits, and the given suffix, such that they
          are consecutively numbered from the specified start_number.
          """
          if start_number < 0:
          raise ArgumentError("Start number {0} is less than 0", start_number)

          fn_map = {
          src_fn: dst_fn
          for src_fn, dst_fn in filename_map(prefix, suffix, start_number).items()
          if src_fn != dst_fn
          }
          if not fn_map:
          return # Nothing to rename

          temp_directory = tempfile.mkdtemp(dir='.', prefix='renumber')
          os.chmod(temp_directory, stat.S_IRWXU)
          for src_fn, dst_fn in fn_map.items():
          os.rename(src_fn, os.path.join(temp_directory, dst_fn))
          for dst_fn in fn_map.values():
          os.rename(os.path.join(temp_directory, dst_fn), dst_fn)
          os.rmdir(temp_directory)

          os.chdir(r'E:spam')
          filling_gaps('spam', '.txt', 1)





          share|improve this answer






























            up vote
            1
            down vote













            Indenting



            I don't like this style



            def get_digits_minimum_length(filenames: List[str], file_prefix: str,
            file_type: str, start_number: int) -> int:


            I'm more a fan of



            def get_digits_minimum_length(
            filenames: List[str], file_prefix: str, file_type: str, start_number: int
            ) -> int:


            ...



            or



            def get_digits_minimum_length(
            filenames: List[str],
            file_prefix: str,
            file_type: str,
            start_number: int,
            ) -> int:
            ...


            But since recently, I use a code formatter (black) to do this



            Pathlib.Path



            Using pathlib.Path instead of os can make things a lot easier. It has builtin globbing, and you can easily generate the new name



            separate the program



            All in all this is a rather simple program:




            1. Find the files who are structured "{prefix}{number}.{suffix}"

            2. Extract number

            3. Generate a new number

            4. Generate the new name

            5. Move the files


            Your program makes a few strange splits between the functions.



            If you need to do this via a temporary directory, use tempfile.TemporaryDirectory as a context manager



            All in all this can be done quite simple



            def find_files(directory, prefix, suffix):
            """find the files in `directory` wich start with `prefix` and end with `.suffix`"""
            directory = Path(directory)

            filename_pattern = re.compile(
            re.escape(prefix) + "(?P<number>[0-9]+)." + re.escape(suffix), re.I
            )
            for file in directory.glob(f"{prefix}*.{suffix}"):
            match = filename_pattern.search(file.name)
            if not match:
            continue
            yield int(match.group("number")), file


            This generator yield the number (as int) and the filename (as Path) of all the files that comply with the prefix and suffix



            def rename_files(files, prefix, suffix, start_num=0):
            """Generates the new filename"""
            files = sorted(files)
            last_file_num = files[-1][0]
            max_num = max(last_file_num, start_num + len(files))
            num_length = len(str(max_num))
            for new_number, (_, file) in enumerate((files), start_num):
            new_name = f"{prefix}{str(new_number).zfill(num_length)}.{suffix}"
            yield file, new_name


            This one yields the file and the new proposed name. To calculate how long the number needs to be, it takes into account the lingest existing number and the starting number + amount of files to rename.



            It also keeps the existing order, ordered via the number, not lexigraphically, so spam1.txt comes before spam02.txt



            the method to move the files takes this generator as input, and does the moving on the fly:



            def move(renames, practice=True):
            for file, new_name in renames:
            new_file = file.with_name(new_name)
            print(f"renaming {file.name} to {new_name}")
            if not practice:
            os.rename(file, new_file)


            This was tested with



            if __name__ == "__main__":
            prefix = "spam"
            suffix = "txt"
            files = find_files(DATA_DIR, prefix, suffix)
            renames = rename_files(files, prefix, suffix)
            move(renames, practice=False)



            renaming spam00.txt to spam0000.txt
            renaming spam002.txt to spam0001.txt
            renaming spam004.txt to spam0002.txt
            renaming spam005.txt to spam0003.txt
            renaming spam7.txt to spam0004.txt
            renaming spam009.txt to spam0005.txt
            renaming spam037.txt to spam0006.txt
            renaming spam045.txt to spam0007.txt
            renaming spam089.txt to spam0008.txt
            renaming spam2000.txt to spam0009.txt



            I tested this with:



            DATA_DIR = Path("data/test")
            testfiles = """eggs010.txt
            spam00.txt
            spam002.txt
            spam004.txt
            spam005.txt
            spam5.dat
            spam7.txt
            spam009.txt
            spam037.txt
            spam045.txt
            spam089.txt
            spam2000.txt"""

            for filename in testfiles.split("n"):
            file = DATA_DIR / filename.strip()
            file.touch()





            share|improve this answer





















              Your Answer





              StackExchange.ifUsing("editor", function () {
              return StackExchange.using("mathjaxEditing", function () {
              StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
              StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
              });
              });
              }, "mathjax-editing");

              StackExchange.ifUsing("editor", function () {
              StackExchange.using("externalEditor", function () {
              StackExchange.using("snippets", function () {
              StackExchange.snippets.init();
              });
              });
              }, "code-snippets");

              StackExchange.ready(function() {
              var channelOptions = {
              tags: "".split(" "),
              id: "196"
              };
              initTagRenderer("".split(" "), "".split(" "), channelOptions);

              StackExchange.using("externalEditor", function() {
              // Have to fire editor after snippets, if snippets enabled
              if (StackExchange.settings.snippets.snippetsEnabled) {
              StackExchange.using("snippets", function() {
              createEditor();
              });
              }
              else {
              createEditor();
              }
              });

              function createEditor() {
              StackExchange.prepareEditor({
              heartbeatType: 'answer',
              convertImagesToLinks: false,
              noModals: true,
              showLowRepImageUploadWarning: true,
              reputationToPostImages: null,
              bindNavPrevention: true,
              postfix: "",
              imageUploader: {
              brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
              contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
              allowUrls: true
              },
              onDemand: true,
              discardSelector: ".discard-answer"
              ,immediatelyShowMarkdownHelp:true
              });


              }
              });














              draft saved

              draft discarded


















              StackExchange.ready(
              function () {
              StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f209173%2frenumbering-files-in-a-folder-consecutively%23new-answer', 'question_page');
              }
              );

              Post as a guest















              Required, but never shown

























              2 Answers
              2






              active

              oldest

              votes








              2 Answers
              2






              active

              oldest

              votes









              active

              oldest

              votes






              active

              oldest

              votes








              up vote
              5
              down vote













              Bug



              You have a two bugs in get_digits_minimum_length():




              if len(str(start_number)) > digits_length:
              digits_length = start_number



              First of all, you probably meant digits_length = len(start_number). But even that is not enough, because the end number might have more digits than the starting number. Therefore, it should be:



              digits_length = max(digits_length, len(str(start_number + len(filenames) - 1)))


              File manipulation



              A better way to make a temporary directory within directory is tempfile.mkdtemp(dir=directory). It guarantees that the name of the new directory will not collide with any existing file or directory. (It automatically generates a different name as necessary to make that happen.)



              The temporary directory needs to have permissions stat.S_IRWXU. On Unix, stat.S_IWRITE (or stat.IWUSR) is insufficient, since you will not be able to call listdir() on the temporary directory.



              The program calls get_filenames() three times. Not only is that slow and wasteful, it also presents a possibility for inconsistencies due to race conditions, if a file gets created or removed while the program is running.



              shutil.move() is overkill, when os.rename() should work just as well. Also, it would be better to just do os.rmdir() rather than shutil.rmtree() to clean up the temporary directory, because os.rmdir() asserts that the directory is empty. (You wouldn't want to accidentally delete any of your files, would you? And if you did want to force a cleanup, with tempfile.TemporaryDirectory() as temporary_directory: … would be a more elegant way to write it.)



              Global state



              os.chdir() affects the global state of a process. I'd try to avoid doing it at all, if possible. And if you do do it, then I'd call it from a prominent place in your code, so that some innocuous-looking utility function (get_filenames()) does not have unexpected insidious side-effects on the rest of your code. After calling os.chdir(), you don't have to ever mention directory again in your code anymore, because every operation is relative to the current directory.



              Design and efficiency



              I am slightly annoyed by some of your very short helper functions: make_filename(), make_folder(), and erase_directory(). In my opinion, they make the code harder to read, because they add very little value over the standard library call, but I have to devote mental effort into keeping track of what they do. Each of those functions is called from just one place, which makes their existence even less worthwhile.



              I imagine that there might be use cases where this program is repeatedly executed for some directory. In that case, you would be moving files into and out of the temporary directory, most of them for naught. A better strategy would be to put more work into mapping the source filenames into their desired destinations (as in my filename_map() below), so that no more filesystem manipulation is done than necessary.



              Suggested solution



              import os
              import re
              import stat
              import tempfile

              def filename_map(prefix, suffix, start_number):
              """
              Make a dictionary that maps source filenames to their renumbered
              destination filename.
              """
              pattern = re.compile(re.escape(prefix) + '([0-9]+)' + re.escape(suffix), re.I)
              num_to_fn = {
              pattern.fullmatch(fn).group(1): fn
              for fn in os.listdir()
              if pattern.fullmatch(fn)
              }
              digits_length = max(
              max((len(num) for num in num_to_fn), default=0),
              len(str(start_number + len(num_to_fn) - 1))
              )
              return {
              num_to_fn[num]: prefix + str(i).zfill(digits_length) + suffix
              for i, num in enumerate(sorted(num_to_fn, key=int), start_number)
              }

              def filling_gaps(prefix, suffix, start_number=1):
              """
              Rename files in the current directory whose names consist of the given
              prefix, followed by some digits, and the given suffix, such that they
              are consecutively numbered from the specified start_number.
              """
              if start_number < 0:
              raise ArgumentError("Start number {0} is less than 0", start_number)

              fn_map = {
              src_fn: dst_fn
              for src_fn, dst_fn in filename_map(prefix, suffix, start_number).items()
              if src_fn != dst_fn
              }
              if not fn_map:
              return # Nothing to rename

              temp_directory = tempfile.mkdtemp(dir='.', prefix='renumber')
              os.chmod(temp_directory, stat.S_IRWXU)
              for src_fn, dst_fn in fn_map.items():
              os.rename(src_fn, os.path.join(temp_directory, dst_fn))
              for dst_fn in fn_map.values():
              os.rename(os.path.join(temp_directory, dst_fn), dst_fn)
              os.rmdir(temp_directory)

              os.chdir(r'E:spam')
              filling_gaps('spam', '.txt', 1)





              share|improve this answer



























                up vote
                5
                down vote













                Bug



                You have a two bugs in get_digits_minimum_length():




                if len(str(start_number)) > digits_length:
                digits_length = start_number



                First of all, you probably meant digits_length = len(start_number). But even that is not enough, because the end number might have more digits than the starting number. Therefore, it should be:



                digits_length = max(digits_length, len(str(start_number + len(filenames) - 1)))


                File manipulation



                A better way to make a temporary directory within directory is tempfile.mkdtemp(dir=directory). It guarantees that the name of the new directory will not collide with any existing file or directory. (It automatically generates a different name as necessary to make that happen.)



                The temporary directory needs to have permissions stat.S_IRWXU. On Unix, stat.S_IWRITE (or stat.IWUSR) is insufficient, since you will not be able to call listdir() on the temporary directory.



                The program calls get_filenames() three times. Not only is that slow and wasteful, it also presents a possibility for inconsistencies due to race conditions, if a file gets created or removed while the program is running.



                shutil.move() is overkill, when os.rename() should work just as well. Also, it would be better to just do os.rmdir() rather than shutil.rmtree() to clean up the temporary directory, because os.rmdir() asserts that the directory is empty. (You wouldn't want to accidentally delete any of your files, would you? And if you did want to force a cleanup, with tempfile.TemporaryDirectory() as temporary_directory: … would be a more elegant way to write it.)



                Global state



                os.chdir() affects the global state of a process. I'd try to avoid doing it at all, if possible. And if you do do it, then I'd call it from a prominent place in your code, so that some innocuous-looking utility function (get_filenames()) does not have unexpected insidious side-effects on the rest of your code. After calling os.chdir(), you don't have to ever mention directory again in your code anymore, because every operation is relative to the current directory.



                Design and efficiency



                I am slightly annoyed by some of your very short helper functions: make_filename(), make_folder(), and erase_directory(). In my opinion, they make the code harder to read, because they add very little value over the standard library call, but I have to devote mental effort into keeping track of what they do. Each of those functions is called from just one place, which makes their existence even less worthwhile.



                I imagine that there might be use cases where this program is repeatedly executed for some directory. In that case, you would be moving files into and out of the temporary directory, most of them for naught. A better strategy would be to put more work into mapping the source filenames into their desired destinations (as in my filename_map() below), so that no more filesystem manipulation is done than necessary.



                Suggested solution



                import os
                import re
                import stat
                import tempfile

                def filename_map(prefix, suffix, start_number):
                """
                Make a dictionary that maps source filenames to their renumbered
                destination filename.
                """
                pattern = re.compile(re.escape(prefix) + '([0-9]+)' + re.escape(suffix), re.I)
                num_to_fn = {
                pattern.fullmatch(fn).group(1): fn
                for fn in os.listdir()
                if pattern.fullmatch(fn)
                }
                digits_length = max(
                max((len(num) for num in num_to_fn), default=0),
                len(str(start_number + len(num_to_fn) - 1))
                )
                return {
                num_to_fn[num]: prefix + str(i).zfill(digits_length) + suffix
                for i, num in enumerate(sorted(num_to_fn, key=int), start_number)
                }

                def filling_gaps(prefix, suffix, start_number=1):
                """
                Rename files in the current directory whose names consist of the given
                prefix, followed by some digits, and the given suffix, such that they
                are consecutively numbered from the specified start_number.
                """
                if start_number < 0:
                raise ArgumentError("Start number {0} is less than 0", start_number)

                fn_map = {
                src_fn: dst_fn
                for src_fn, dst_fn in filename_map(prefix, suffix, start_number).items()
                if src_fn != dst_fn
                }
                if not fn_map:
                return # Nothing to rename

                temp_directory = tempfile.mkdtemp(dir='.', prefix='renumber')
                os.chmod(temp_directory, stat.S_IRWXU)
                for src_fn, dst_fn in fn_map.items():
                os.rename(src_fn, os.path.join(temp_directory, dst_fn))
                for dst_fn in fn_map.values():
                os.rename(os.path.join(temp_directory, dst_fn), dst_fn)
                os.rmdir(temp_directory)

                os.chdir(r'E:spam')
                filling_gaps('spam', '.txt', 1)





                share|improve this answer

























                  up vote
                  5
                  down vote










                  up vote
                  5
                  down vote









                  Bug



                  You have a two bugs in get_digits_minimum_length():




                  if len(str(start_number)) > digits_length:
                  digits_length = start_number



                  First of all, you probably meant digits_length = len(start_number). But even that is not enough, because the end number might have more digits than the starting number. Therefore, it should be:



                  digits_length = max(digits_length, len(str(start_number + len(filenames) - 1)))


                  File manipulation



                  A better way to make a temporary directory within directory is tempfile.mkdtemp(dir=directory). It guarantees that the name of the new directory will not collide with any existing file or directory. (It automatically generates a different name as necessary to make that happen.)



                  The temporary directory needs to have permissions stat.S_IRWXU. On Unix, stat.S_IWRITE (or stat.IWUSR) is insufficient, since you will not be able to call listdir() on the temporary directory.



                  The program calls get_filenames() three times. Not only is that slow and wasteful, it also presents a possibility for inconsistencies due to race conditions, if a file gets created or removed while the program is running.



                  shutil.move() is overkill, when os.rename() should work just as well. Also, it would be better to just do os.rmdir() rather than shutil.rmtree() to clean up the temporary directory, because os.rmdir() asserts that the directory is empty. (You wouldn't want to accidentally delete any of your files, would you? And if you did want to force a cleanup, with tempfile.TemporaryDirectory() as temporary_directory: … would be a more elegant way to write it.)



                  Global state



                  os.chdir() affects the global state of a process. I'd try to avoid doing it at all, if possible. And if you do do it, then I'd call it from a prominent place in your code, so that some innocuous-looking utility function (get_filenames()) does not have unexpected insidious side-effects on the rest of your code. After calling os.chdir(), you don't have to ever mention directory again in your code anymore, because every operation is relative to the current directory.



                  Design and efficiency



                  I am slightly annoyed by some of your very short helper functions: make_filename(), make_folder(), and erase_directory(). In my opinion, they make the code harder to read, because they add very little value over the standard library call, but I have to devote mental effort into keeping track of what they do. Each of those functions is called from just one place, which makes their existence even less worthwhile.



                  I imagine that there might be use cases where this program is repeatedly executed for some directory. In that case, you would be moving files into and out of the temporary directory, most of them for naught. A better strategy would be to put more work into mapping the source filenames into their desired destinations (as in my filename_map() below), so that no more filesystem manipulation is done than necessary.



                  Suggested solution



                  import os
                  import re
                  import stat
                  import tempfile

                  def filename_map(prefix, suffix, start_number):
                  """
                  Make a dictionary that maps source filenames to their renumbered
                  destination filename.
                  """
                  pattern = re.compile(re.escape(prefix) + '([0-9]+)' + re.escape(suffix), re.I)
                  num_to_fn = {
                  pattern.fullmatch(fn).group(1): fn
                  for fn in os.listdir()
                  if pattern.fullmatch(fn)
                  }
                  digits_length = max(
                  max((len(num) for num in num_to_fn), default=0),
                  len(str(start_number + len(num_to_fn) - 1))
                  )
                  return {
                  num_to_fn[num]: prefix + str(i).zfill(digits_length) + suffix
                  for i, num in enumerate(sorted(num_to_fn, key=int), start_number)
                  }

                  def filling_gaps(prefix, suffix, start_number=1):
                  """
                  Rename files in the current directory whose names consist of the given
                  prefix, followed by some digits, and the given suffix, such that they
                  are consecutively numbered from the specified start_number.
                  """
                  if start_number < 0:
                  raise ArgumentError("Start number {0} is less than 0", start_number)

                  fn_map = {
                  src_fn: dst_fn
                  for src_fn, dst_fn in filename_map(prefix, suffix, start_number).items()
                  if src_fn != dst_fn
                  }
                  if not fn_map:
                  return # Nothing to rename

                  temp_directory = tempfile.mkdtemp(dir='.', prefix='renumber')
                  os.chmod(temp_directory, stat.S_IRWXU)
                  for src_fn, dst_fn in fn_map.items():
                  os.rename(src_fn, os.path.join(temp_directory, dst_fn))
                  for dst_fn in fn_map.values():
                  os.rename(os.path.join(temp_directory, dst_fn), dst_fn)
                  os.rmdir(temp_directory)

                  os.chdir(r'E:spam')
                  filling_gaps('spam', '.txt', 1)





                  share|improve this answer














                  Bug



                  You have a two bugs in get_digits_minimum_length():




                  if len(str(start_number)) > digits_length:
                  digits_length = start_number



                  First of all, you probably meant digits_length = len(start_number). But even that is not enough, because the end number might have more digits than the starting number. Therefore, it should be:



                  digits_length = max(digits_length, len(str(start_number + len(filenames) - 1)))


                  File manipulation



                  A better way to make a temporary directory within directory is tempfile.mkdtemp(dir=directory). It guarantees that the name of the new directory will not collide with any existing file or directory. (It automatically generates a different name as necessary to make that happen.)



                  The temporary directory needs to have permissions stat.S_IRWXU. On Unix, stat.S_IWRITE (or stat.IWUSR) is insufficient, since you will not be able to call listdir() on the temporary directory.



                  The program calls get_filenames() three times. Not only is that slow and wasteful, it also presents a possibility for inconsistencies due to race conditions, if a file gets created or removed while the program is running.



                  shutil.move() is overkill, when os.rename() should work just as well. Also, it would be better to just do os.rmdir() rather than shutil.rmtree() to clean up the temporary directory, because os.rmdir() asserts that the directory is empty. (You wouldn't want to accidentally delete any of your files, would you? And if you did want to force a cleanup, with tempfile.TemporaryDirectory() as temporary_directory: … would be a more elegant way to write it.)



                  Global state



                  os.chdir() affects the global state of a process. I'd try to avoid doing it at all, if possible. And if you do do it, then I'd call it from a prominent place in your code, so that some innocuous-looking utility function (get_filenames()) does not have unexpected insidious side-effects on the rest of your code. After calling os.chdir(), you don't have to ever mention directory again in your code anymore, because every operation is relative to the current directory.



                  Design and efficiency



                  I am slightly annoyed by some of your very short helper functions: make_filename(), make_folder(), and erase_directory(). In my opinion, they make the code harder to read, because they add very little value over the standard library call, but I have to devote mental effort into keeping track of what they do. Each of those functions is called from just one place, which makes their existence even less worthwhile.



                  I imagine that there might be use cases where this program is repeatedly executed for some directory. In that case, you would be moving files into and out of the temporary directory, most of them for naught. A better strategy would be to put more work into mapping the source filenames into their desired destinations (as in my filename_map() below), so that no more filesystem manipulation is done than necessary.



                  Suggested solution



                  import os
                  import re
                  import stat
                  import tempfile

                  def filename_map(prefix, suffix, start_number):
                  """
                  Make a dictionary that maps source filenames to their renumbered
                  destination filename.
                  """
                  pattern = re.compile(re.escape(prefix) + '([0-9]+)' + re.escape(suffix), re.I)
                  num_to_fn = {
                  pattern.fullmatch(fn).group(1): fn
                  for fn in os.listdir()
                  if pattern.fullmatch(fn)
                  }
                  digits_length = max(
                  max((len(num) for num in num_to_fn), default=0),
                  len(str(start_number + len(num_to_fn) - 1))
                  )
                  return {
                  num_to_fn[num]: prefix + str(i).zfill(digits_length) + suffix
                  for i, num in enumerate(sorted(num_to_fn, key=int), start_number)
                  }

                  def filling_gaps(prefix, suffix, start_number=1):
                  """
                  Rename files in the current directory whose names consist of the given
                  prefix, followed by some digits, and the given suffix, such that they
                  are consecutively numbered from the specified start_number.
                  """
                  if start_number < 0:
                  raise ArgumentError("Start number {0} is less than 0", start_number)

                  fn_map = {
                  src_fn: dst_fn
                  for src_fn, dst_fn in filename_map(prefix, suffix, start_number).items()
                  if src_fn != dst_fn
                  }
                  if not fn_map:
                  return # Nothing to rename

                  temp_directory = tempfile.mkdtemp(dir='.', prefix='renumber')
                  os.chmod(temp_directory, stat.S_IRWXU)
                  for src_fn, dst_fn in fn_map.items():
                  os.rename(src_fn, os.path.join(temp_directory, dst_fn))
                  for dst_fn in fn_map.values():
                  os.rename(os.path.join(temp_directory, dst_fn), dst_fn)
                  os.rmdir(temp_directory)

                  os.chdir(r'E:spam')
                  filling_gaps('spam', '.txt', 1)






                  share|improve this answer














                  share|improve this answer



                  share|improve this answer








                  edited 10 hours ago

























                  answered yesterday









                  200_success

                  127k15149412




                  127k15149412
























                      up vote
                      1
                      down vote













                      Indenting



                      I don't like this style



                      def get_digits_minimum_length(filenames: List[str], file_prefix: str,
                      file_type: str, start_number: int) -> int:


                      I'm more a fan of



                      def get_digits_minimum_length(
                      filenames: List[str], file_prefix: str, file_type: str, start_number: int
                      ) -> int:


                      ...



                      or



                      def get_digits_minimum_length(
                      filenames: List[str],
                      file_prefix: str,
                      file_type: str,
                      start_number: int,
                      ) -> int:
                      ...


                      But since recently, I use a code formatter (black) to do this



                      Pathlib.Path



                      Using pathlib.Path instead of os can make things a lot easier. It has builtin globbing, and you can easily generate the new name



                      separate the program



                      All in all this is a rather simple program:




                      1. Find the files who are structured "{prefix}{number}.{suffix}"

                      2. Extract number

                      3. Generate a new number

                      4. Generate the new name

                      5. Move the files


                      Your program makes a few strange splits between the functions.



                      If you need to do this via a temporary directory, use tempfile.TemporaryDirectory as a context manager



                      All in all this can be done quite simple



                      def find_files(directory, prefix, suffix):
                      """find the files in `directory` wich start with `prefix` and end with `.suffix`"""
                      directory = Path(directory)

                      filename_pattern = re.compile(
                      re.escape(prefix) + "(?P<number>[0-9]+)." + re.escape(suffix), re.I
                      )
                      for file in directory.glob(f"{prefix}*.{suffix}"):
                      match = filename_pattern.search(file.name)
                      if not match:
                      continue
                      yield int(match.group("number")), file


                      This generator yield the number (as int) and the filename (as Path) of all the files that comply with the prefix and suffix



                      def rename_files(files, prefix, suffix, start_num=0):
                      """Generates the new filename"""
                      files = sorted(files)
                      last_file_num = files[-1][0]
                      max_num = max(last_file_num, start_num + len(files))
                      num_length = len(str(max_num))
                      for new_number, (_, file) in enumerate((files), start_num):
                      new_name = f"{prefix}{str(new_number).zfill(num_length)}.{suffix}"
                      yield file, new_name


                      This one yields the file and the new proposed name. To calculate how long the number needs to be, it takes into account the lingest existing number and the starting number + amount of files to rename.



                      It also keeps the existing order, ordered via the number, not lexigraphically, so spam1.txt comes before spam02.txt



                      the method to move the files takes this generator as input, and does the moving on the fly:



                      def move(renames, practice=True):
                      for file, new_name in renames:
                      new_file = file.with_name(new_name)
                      print(f"renaming {file.name} to {new_name}")
                      if not practice:
                      os.rename(file, new_file)


                      This was tested with



                      if __name__ == "__main__":
                      prefix = "spam"
                      suffix = "txt"
                      files = find_files(DATA_DIR, prefix, suffix)
                      renames = rename_files(files, prefix, suffix)
                      move(renames, practice=False)



                      renaming spam00.txt to spam0000.txt
                      renaming spam002.txt to spam0001.txt
                      renaming spam004.txt to spam0002.txt
                      renaming spam005.txt to spam0003.txt
                      renaming spam7.txt to spam0004.txt
                      renaming spam009.txt to spam0005.txt
                      renaming spam037.txt to spam0006.txt
                      renaming spam045.txt to spam0007.txt
                      renaming spam089.txt to spam0008.txt
                      renaming spam2000.txt to spam0009.txt



                      I tested this with:



                      DATA_DIR = Path("data/test")
                      testfiles = """eggs010.txt
                      spam00.txt
                      spam002.txt
                      spam004.txt
                      spam005.txt
                      spam5.dat
                      spam7.txt
                      spam009.txt
                      spam037.txt
                      spam045.txt
                      spam089.txt
                      spam2000.txt"""

                      for filename in testfiles.split("n"):
                      file = DATA_DIR / filename.strip()
                      file.touch()





                      share|improve this answer

























                        up vote
                        1
                        down vote













                        Indenting



                        I don't like this style



                        def get_digits_minimum_length(filenames: List[str], file_prefix: str,
                        file_type: str, start_number: int) -> int:


                        I'm more a fan of



                        def get_digits_minimum_length(
                        filenames: List[str], file_prefix: str, file_type: str, start_number: int
                        ) -> int:


                        ...



                        or



                        def get_digits_minimum_length(
                        filenames: List[str],
                        file_prefix: str,
                        file_type: str,
                        start_number: int,
                        ) -> int:
                        ...


                        But since recently, I use a code formatter (black) to do this



                        Pathlib.Path



                        Using pathlib.Path instead of os can make things a lot easier. It has builtin globbing, and you can easily generate the new name



                        separate the program



                        All in all this is a rather simple program:




                        1. Find the files who are structured "{prefix}{number}.{suffix}"

                        2. Extract number

                        3. Generate a new number

                        4. Generate the new name

                        5. Move the files


                        Your program makes a few strange splits between the functions.



                        If you need to do this via a temporary directory, use tempfile.TemporaryDirectory as a context manager



                        All in all this can be done quite simple



                        def find_files(directory, prefix, suffix):
                        """find the files in `directory` wich start with `prefix` and end with `.suffix`"""
                        directory = Path(directory)

                        filename_pattern = re.compile(
                        re.escape(prefix) + "(?P<number>[0-9]+)." + re.escape(suffix), re.I
                        )
                        for file in directory.glob(f"{prefix}*.{suffix}"):
                        match = filename_pattern.search(file.name)
                        if not match:
                        continue
                        yield int(match.group("number")), file


                        This generator yield the number (as int) and the filename (as Path) of all the files that comply with the prefix and suffix



                        def rename_files(files, prefix, suffix, start_num=0):
                        """Generates the new filename"""
                        files = sorted(files)
                        last_file_num = files[-1][0]
                        max_num = max(last_file_num, start_num + len(files))
                        num_length = len(str(max_num))
                        for new_number, (_, file) in enumerate((files), start_num):
                        new_name = f"{prefix}{str(new_number).zfill(num_length)}.{suffix}"
                        yield file, new_name


                        This one yields the file and the new proposed name. To calculate how long the number needs to be, it takes into account the lingest existing number and the starting number + amount of files to rename.



                        It also keeps the existing order, ordered via the number, not lexigraphically, so spam1.txt comes before spam02.txt



                        the method to move the files takes this generator as input, and does the moving on the fly:



                        def move(renames, practice=True):
                        for file, new_name in renames:
                        new_file = file.with_name(new_name)
                        print(f"renaming {file.name} to {new_name}")
                        if not practice:
                        os.rename(file, new_file)


                        This was tested with



                        if __name__ == "__main__":
                        prefix = "spam"
                        suffix = "txt"
                        files = find_files(DATA_DIR, prefix, suffix)
                        renames = rename_files(files, prefix, suffix)
                        move(renames, practice=False)



                        renaming spam00.txt to spam0000.txt
                        renaming spam002.txt to spam0001.txt
                        renaming spam004.txt to spam0002.txt
                        renaming spam005.txt to spam0003.txt
                        renaming spam7.txt to spam0004.txt
                        renaming spam009.txt to spam0005.txt
                        renaming spam037.txt to spam0006.txt
                        renaming spam045.txt to spam0007.txt
                        renaming spam089.txt to spam0008.txt
                        renaming spam2000.txt to spam0009.txt



                        I tested this with:



                        DATA_DIR = Path("data/test")
                        testfiles = """eggs010.txt
                        spam00.txt
                        spam002.txt
                        spam004.txt
                        spam005.txt
                        spam5.dat
                        spam7.txt
                        spam009.txt
                        spam037.txt
                        spam045.txt
                        spam089.txt
                        spam2000.txt"""

                        for filename in testfiles.split("n"):
                        file = DATA_DIR / filename.strip()
                        file.touch()





                        share|improve this answer























                          up vote
                          1
                          down vote










                          up vote
                          1
                          down vote









                          Indenting



                          I don't like this style



                          def get_digits_minimum_length(filenames: List[str], file_prefix: str,
                          file_type: str, start_number: int) -> int:


                          I'm more a fan of



                          def get_digits_minimum_length(
                          filenames: List[str], file_prefix: str, file_type: str, start_number: int
                          ) -> int:


                          ...



                          or



                          def get_digits_minimum_length(
                          filenames: List[str],
                          file_prefix: str,
                          file_type: str,
                          start_number: int,
                          ) -> int:
                          ...


                          But since recently, I use a code formatter (black) to do this



                          Pathlib.Path



                          Using pathlib.Path instead of os can make things a lot easier. It has builtin globbing, and you can easily generate the new name



                          separate the program



                          All in all this is a rather simple program:




                          1. Find the files who are structured "{prefix}{number}.{suffix}"

                          2. Extract number

                          3. Generate a new number

                          4. Generate the new name

                          5. Move the files


                          Your program makes a few strange splits between the functions.



                          If you need to do this via a temporary directory, use tempfile.TemporaryDirectory as a context manager



                          All in all this can be done quite simple



                          def find_files(directory, prefix, suffix):
                          """find the files in `directory` wich start with `prefix` and end with `.suffix`"""
                          directory = Path(directory)

                          filename_pattern = re.compile(
                          re.escape(prefix) + "(?P<number>[0-9]+)." + re.escape(suffix), re.I
                          )
                          for file in directory.glob(f"{prefix}*.{suffix}"):
                          match = filename_pattern.search(file.name)
                          if not match:
                          continue
                          yield int(match.group("number")), file


                          This generator yield the number (as int) and the filename (as Path) of all the files that comply with the prefix and suffix



                          def rename_files(files, prefix, suffix, start_num=0):
                          """Generates the new filename"""
                          files = sorted(files)
                          last_file_num = files[-1][0]
                          max_num = max(last_file_num, start_num + len(files))
                          num_length = len(str(max_num))
                          for new_number, (_, file) in enumerate((files), start_num):
                          new_name = f"{prefix}{str(new_number).zfill(num_length)}.{suffix}"
                          yield file, new_name


                          This one yields the file and the new proposed name. To calculate how long the number needs to be, it takes into account the lingest existing number and the starting number + amount of files to rename.



                          It also keeps the existing order, ordered via the number, not lexigraphically, so spam1.txt comes before spam02.txt



                          the method to move the files takes this generator as input, and does the moving on the fly:



                          def move(renames, practice=True):
                          for file, new_name in renames:
                          new_file = file.with_name(new_name)
                          print(f"renaming {file.name} to {new_name}")
                          if not practice:
                          os.rename(file, new_file)


                          This was tested with



                          if __name__ == "__main__":
                          prefix = "spam"
                          suffix = "txt"
                          files = find_files(DATA_DIR, prefix, suffix)
                          renames = rename_files(files, prefix, suffix)
                          move(renames, practice=False)



                          renaming spam00.txt to spam0000.txt
                          renaming spam002.txt to spam0001.txt
                          renaming spam004.txt to spam0002.txt
                          renaming spam005.txt to spam0003.txt
                          renaming spam7.txt to spam0004.txt
                          renaming spam009.txt to spam0005.txt
                          renaming spam037.txt to spam0006.txt
                          renaming spam045.txt to spam0007.txt
                          renaming spam089.txt to spam0008.txt
                          renaming spam2000.txt to spam0009.txt



                          I tested this with:



                          DATA_DIR = Path("data/test")
                          testfiles = """eggs010.txt
                          spam00.txt
                          spam002.txt
                          spam004.txt
                          spam005.txt
                          spam5.dat
                          spam7.txt
                          spam009.txt
                          spam037.txt
                          spam045.txt
                          spam089.txt
                          spam2000.txt"""

                          for filename in testfiles.split("n"):
                          file = DATA_DIR / filename.strip()
                          file.touch()





                          share|improve this answer












                          Indenting



                          I don't like this style



                          def get_digits_minimum_length(filenames: List[str], file_prefix: str,
                          file_type: str, start_number: int) -> int:


                          I'm more a fan of



                          def get_digits_minimum_length(
                          filenames: List[str], file_prefix: str, file_type: str, start_number: int
                          ) -> int:


                          ...



                          or



                          def get_digits_minimum_length(
                          filenames: List[str],
                          file_prefix: str,
                          file_type: str,
                          start_number: int,
                          ) -> int:
                          ...


                          But since recently, I use a code formatter (black) to do this



                          Pathlib.Path



                          Using pathlib.Path instead of os can make things a lot easier. It has builtin globbing, and you can easily generate the new name



                          separate the program



                          All in all this is a rather simple program:




                          1. Find the files who are structured "{prefix}{number}.{suffix}"

                          2. Extract number

                          3. Generate a new number

                          4. Generate the new name

                          5. Move the files


                          Your program makes a few strange splits between the functions.



                          If you need to do this via a temporary directory, use tempfile.TemporaryDirectory as a context manager



                          All in all this can be done quite simple



                          def find_files(directory, prefix, suffix):
                          """find the files in `directory` wich start with `prefix` and end with `.suffix`"""
                          directory = Path(directory)

                          filename_pattern = re.compile(
                          re.escape(prefix) + "(?P<number>[0-9]+)." + re.escape(suffix), re.I
                          )
                          for file in directory.glob(f"{prefix}*.{suffix}"):
                          match = filename_pattern.search(file.name)
                          if not match:
                          continue
                          yield int(match.group("number")), file


                          This generator yield the number (as int) and the filename (as Path) of all the files that comply with the prefix and suffix



                          def rename_files(files, prefix, suffix, start_num=0):
                          """Generates the new filename"""
                          files = sorted(files)
                          last_file_num = files[-1][0]
                          max_num = max(last_file_num, start_num + len(files))
                          num_length = len(str(max_num))
                          for new_number, (_, file) in enumerate((files), start_num):
                          new_name = f"{prefix}{str(new_number).zfill(num_length)}.{suffix}"
                          yield file, new_name


                          This one yields the file and the new proposed name. To calculate how long the number needs to be, it takes into account the lingest existing number and the starting number + amount of files to rename.



                          It also keeps the existing order, ordered via the number, not lexigraphically, so spam1.txt comes before spam02.txt



                          the method to move the files takes this generator as input, and does the moving on the fly:



                          def move(renames, practice=True):
                          for file, new_name in renames:
                          new_file = file.with_name(new_name)
                          print(f"renaming {file.name} to {new_name}")
                          if not practice:
                          os.rename(file, new_file)


                          This was tested with



                          if __name__ == "__main__":
                          prefix = "spam"
                          suffix = "txt"
                          files = find_files(DATA_DIR, prefix, suffix)
                          renames = rename_files(files, prefix, suffix)
                          move(renames, practice=False)



                          renaming spam00.txt to spam0000.txt
                          renaming spam002.txt to spam0001.txt
                          renaming spam004.txt to spam0002.txt
                          renaming spam005.txt to spam0003.txt
                          renaming spam7.txt to spam0004.txt
                          renaming spam009.txt to spam0005.txt
                          renaming spam037.txt to spam0006.txt
                          renaming spam045.txt to spam0007.txt
                          renaming spam089.txt to spam0008.txt
                          renaming spam2000.txt to spam0009.txt



                          I tested this with:



                          DATA_DIR = Path("data/test")
                          testfiles = """eggs010.txt
                          spam00.txt
                          spam002.txt
                          spam004.txt
                          spam005.txt
                          spam5.dat
                          spam7.txt
                          spam009.txt
                          spam037.txt
                          spam045.txt
                          spam089.txt
                          spam2000.txt"""

                          for filename in testfiles.split("n"):
                          file = DATA_DIR / filename.strip()
                          file.touch()






                          share|improve this answer












                          share|improve this answer



                          share|improve this answer










                          answered 12 hours ago









                          Maarten Fabré

                          4,384417




                          4,384417






























                              draft saved

                              draft discarded




















































                              Thanks for contributing an answer to Code Review Stack Exchange!


                              • Please be sure to answer the question. Provide details and share your research!

                              But avoid



                              • Asking for help, clarification, or responding to other answers.

                              • Making statements based on opinion; back them up with references or personal experience.


                              Use MathJax to format equations. MathJax reference.


                              To learn more, see our tips on writing great answers.





                              Some of your past answers have not been well-received, and you're in danger of being blocked from answering.


                              Please pay close attention to the following guidance:


                              • Please be sure to answer the question. Provide details and share your research!

                              But avoid



                              • Asking for help, clarification, or responding to other answers.

                              • Making statements based on opinion; back them up with references or personal experience.


                              To learn more, see our tips on writing great answers.




                              draft saved


                              draft discarded














                              StackExchange.ready(
                              function () {
                              StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f209173%2frenumbering-files-in-a-folder-consecutively%23new-answer', 'question_page');
                              }
                              );

                              Post as a guest















                              Required, but never shown





















































                              Required, but never shown














                              Required, but never shown












                              Required, but never shown







                              Required, but never shown

































                              Required, but never shown














                              Required, but never shown












                              Required, but never shown







                              Required, but never shown







                              Popular posts from this blog

                              Morgemoulin

                              Scott Moir

                              Souastre