diff mbox series

[v1,1/5] ocfs2: partly revert da5e7c87827e8 for mounting crash issue

Message ID 20220408103012.1419-2-heming.zhao@suse.com (mailing list archive)
State New, archived
Headers show
Series rewrite error handling during mounting stage | expand

Commit Message

Heming Zhao April 8, 2022, 10:30 a.m. UTC
After commit da5e7c87827e8 ("ocfs2: cleanup journal init and shutdown"),
journal init later than before, it makes NULL pointer access in free
routine.

Crash flow:

ocfs2_fill_super
 + ocfs2_mount_volume
 |  + ocfs2_dlm_init //fail & return, osb->journal is NULL.
 |  + ...
 |  + ocfs2_check_volume //no chance to init osb->journal
 |
 + ...
 + ocfs2_dismount_volume
    ocfs2_release_system_inodes
      ...
       evict
        ...
         ocfs2_clear_inode
          ocfs2_checkpoint_inode
           ocfs2_ci_fully_checkpointed
            time_after(journal->j_trans_id, ci->ci_last_trans)
             + journal is empty, crash!

For fixing, there are three solutions:

1> Revert commit da5e7c87827e8

For avoiding kernel crash, this make sense for us. We only concerned
whether there has any non-system inode access before dlm init. The
answer is NO. And all journal replay/recovery handling happen after
dlm & journal init done. So this method is not graceful but workable.

2> Add osb->journal check in free inode routine (eg ocfs2_clear_inode)

The fix code is special for mounting phase, but it will continue
working after mounting stage. In another word, this method adds useless
code in normal inode free flow.

3> Do directly free inode in mounting phase

This method is brutal/complex and may introduce unsafe code, currently
maintainer didn't like.

At last, we chose method <1> and partly reverted commit da5e7c87827e8.
We reverted journal init code, and kept cleanup code flow.

Fixes: da5e7c87827e8 ("ocfs2: cleanup journal init and shutdown")
Signed-off-by: Heming Zhao <heming.zhao@suse.com>
---
 fs/ocfs2/inode.c   |  4 ++--
 fs/ocfs2/journal.c | 21 +--------------------
 fs/ocfs2/super.c   | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 36 insertions(+), 22 deletions(-)

Comments

Joseph Qi April 9, 2022, 1:11 p.m. UTC | #1
Suggest rename the subject, something like
ocfs2: fix mount crash if journal is not alloced

On 4/8/22 6:30 PM, Heming Zhao wrote:
> After commit da5e7c87827e8 ("ocfs2: cleanup journal init and shutdown"),
> journal init later than before, it makes NULL pointer access in free
> routine.
> 
> Crash flow:
> 
> ocfs2_fill_super
>  + ocfs2_mount_volume
>  |  + ocfs2_dlm_init //fail & return, osb->journal is NULL.
>  |  + ...
>  |  + ocfs2_check_volume //no chance to init osb->journal
>  |
>  + ...
>  + ocfs2_dismount_volume
>     ocfs2_release_system_inodes
>       ...
>        evict
>         ...
>          ocfs2_clear_inode
>           ocfs2_checkpoint_inode
>            ocfs2_ci_fully_checkpointed
>             time_after(journal->j_trans_id, ci->ci_last_trans)
>              + journal is empty, crash!
> 
> For fixing, there are three solutions:
> 
> 1> Revert commit da5e7c87827e8
> 
> For avoiding kernel crash, this make sense for us. We only concerned
> whether there has any non-system inode access before dlm init. The
> answer is NO. And all journal replay/recovery handling happen after
> dlm & journal init done. So this method is not graceful but workable.
> 
> 2> Add osb->journal check in free inode routine (eg ocfs2_clear_inode)
> 
> The fix code is special for mounting phase, but it will continue
> working after mounting stage. In another word, this method adds useless
> code in normal inode free flow.
> 
> 3> Do directly free inode in mounting phase
> 
> This method is brutal/complex and may introduce unsafe code, currently
> maintainer didn't like.
> 
> At last, we chose method <1> and partly reverted commit da5e7c87827e8.
> We reverted journal init code, and kept cleanup code flow.
> 
> Fixes: da5e7c87827e8 ("ocfs2: cleanup journal init and shutdown")
> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
> ---
>  fs/ocfs2/inode.c   |  4 ++--
>  fs/ocfs2/journal.c | 21 +--------------------
>  fs/ocfs2/super.c   | 33 +++++++++++++++++++++++++++++++++
>  3 files changed, 36 insertions(+), 22 deletions(-)
> 
> diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
> index 5739dc301569..bb116c39b581 100644
> --- a/fs/ocfs2/inode.c
> +++ b/fs/ocfs2/inode.c
> @@ -125,6 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
>  	struct inode *inode = NULL;
>  	struct super_block *sb = osb->sb;
>  	struct ocfs2_find_inode_args args;
> +	journal_t *journal = osb->journal->j_journal;
>  
>  	trace_ocfs2_iget_begin((unsigned long long)blkno, flags,
>  			       sysfile_type);
> @@ -171,11 +172,10 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
>  	 * part of the transaction - the inode could have been reclaimed and
>  	 * now it is reread from disk.
>  	 */
> -	if (osb->journal) {
> +	if (journal) {
>  		transaction_t *transaction;
>  		tid_t tid;
>  		struct ocfs2_inode_info *oi = OCFS2_I(inode);
> -		journal_t *journal = osb->journal->j_journal;
>  
>  		read_lock(&journal->j_state_lock);
>  		if (journal->j_running_transaction)
> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
> index 1887a2708709..afb85de3bb60 100644
> --- a/fs/ocfs2/journal.c
> +++ b/fs/ocfs2/journal.c
> @@ -815,30 +815,11 @@ int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
>  	int status = -1;
>  	struct inode *inode = NULL; /* the journal inode */
>  	journal_t *j_journal = NULL;
> -	struct ocfs2_journal *journal = NULL;
> +	struct ocfs2_journal *journal = osb->journal;
>  	struct ocfs2_dinode *di = NULL;
>  	struct buffer_head *bh = NULL;
>  	int inode_lock = 0;
>  
> -	/* initialize our journal structure */
> -	journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
> -	if (!journal) {
> -		mlog(ML_ERROR, "unable to alloc journal\n");
> -		status = -ENOMEM;
> -		goto done;
> -	}
> -	osb->journal = journal;
> -	journal->j_osb = osb;
> -
> -	atomic_set(&journal->j_num_trans, 0);
> -	init_rwsem(&journal->j_trans_barrier);
> -	init_waitqueue_head(&journal->j_checkpointed);
> -	spin_lock_init(&journal->j_lock);
> -	journal->j_trans_id = 1UL;
> -	INIT_LIST_HEAD(&journal->j_la_cleanups);
> -	INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
> -	journal->j_state = OCFS2_JOURNAL_FREE;
> -
>  	/* already have the inode for our journal */
>  	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
>  					    osb->slot_num);
> diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
> index 477cdf94122e..5f63a2333e52 100644
> --- a/fs/ocfs2/super.c
> +++ b/fs/ocfs2/super.c
> @@ -2015,6 +2015,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
>  	int i, cbits, bbits;
>  	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
>  	struct inode *inode = NULL;
> +	struct ocfs2_journal *journal;
>  	struct ocfs2_super *osb;
>  	u64 total_blocks;
>  
> @@ -2195,6 +2196,32 @@ static int ocfs2_initialize_super(struct super_block *sb,
>  
>  	get_random_bytes(&osb->s_next_generation, sizeof(u32));
>  
> +	/* FIXME
> +	 * This should be done in ocfs2_journal_init(), but unknown
> +	 * ordering issues will cause the filesystem to crash.
> +	 * If anyone wants to figure out what part of the code
> +	 * refers to osb->journal before ocfs2_journal_init() is run,
> +	 * be my guest.
> +	 */
> +	/* initialize our journal structure */
> +	journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
> +	if (!journal) {
> +		mlog(ML_ERROR, "unable to alloc journal\n");
> +		status = -ENOMEM;
> +		goto bail;
> +	}
> +	osb->journal = journal;
> +	journal->j_osb = osb;
> +
> +	atomic_set(&journal->j_num_trans, 0);
> +	init_rwsem(&journal->j_trans_barrier);
> +	init_waitqueue_head(&journal->j_checkpointed);
> +	spin_lock_init(&journal->j_lock);
> +	journal->j_trans_id = 1UL;
> +	INIT_LIST_HEAD(&journal->j_la_cleanups);
> +	INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
> +	journal->j_state = OCFS2_JOURNAL_FREE;
> +
We may fold these into a new function, such as ocfs2_journal_alloc().

Thanks,
Joseph

>  	INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs);
>  	init_llist_head(&osb->dquot_drop_list);
>  
> @@ -2483,6 +2510,12 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
>  
>  	kfree(osb->osb_orphan_wipes);
>  	kfree(osb->slot_recovery_generations);
> +	/* FIXME
> +	 * This belongs in journal shutdown, but because we have to
> +	 * allocate osb->journal at the middle of ocfs2_initialize_super(),
> +	 * we free it here.
> +	 */
> +	kfree(osb->journal);
>  	kfree(osb->local_alloc_copy);
>  	kfree(osb->uuid_str);
>  	kfree(osb->vol_label);
David Sterba via Ocfs2-devel April 9, 2022, 4:28 p.m. UTC | #2
On 4/9/22 21:11, Joseph Qi wrote:
> Suggest rename the subject, something like
> ocfs2: fix mount crash if journal is not alloced

OK.

> 
> On 4/8/22 6:30 PM, Heming Zhao wrote:
>> After commit da5e7c87827e8 ("ocfs2: cleanup journal init and shutdown"),
>> journal init later than before, it makes NULL pointer access in free
>> routine.
>>
>> Crash flow:
>>
>> ocfs2_fill_super
>>   + ocfs2_mount_volume
>>   |  + ocfs2_dlm_init //fail & return, osb->journal is NULL.
>>   |  + ...
>>   |  + ocfs2_check_volume //no chance to init osb->journal
>>   |
>>   + ...
>>   + ocfs2_dismount_volume
>>      ocfs2_release_system_inodes
>>        ...
>>         evict
>>          ...
>>           ocfs2_clear_inode
>>            ocfs2_checkpoint_inode
>>             ocfs2_ci_fully_checkpointed
>>              time_after(journal->j_trans_id, ci->ci_last_trans)
>>               + journal is empty, crash!
>>
>> For fixing, there are three solutions:
>>
>> 1> Revert commit da5e7c87827e8
>>
>> For avoiding kernel crash, this make sense for us. We only concerned
>> whether there has any non-system inode access before dlm init. The
>> answer is NO. And all journal replay/recovery handling happen after
>> dlm & journal init done. So this method is not graceful but workable.
>>
>> 2> Add osb->journal check in free inode routine (eg ocfs2_clear_inode)
>>
>> The fix code is special for mounting phase, but it will continue
>> working after mounting stage. In another word, this method adds useless
>> code in normal inode free flow.
>>
>> 3> Do directly free inode in mounting phase
>>
>> This method is brutal/complex and may introduce unsafe code, currently
>> maintainer didn't like.
>>
>> At last, we chose method <1> and partly reverted commit da5e7c87827e8.
>> We reverted journal init code, and kept cleanup code flow.
>>
>> Fixes: da5e7c87827e8 ("ocfs2: cleanup journal init and shutdown")
>> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
>> ---
>>   fs/ocfs2/inode.c   |  4 ++--
>>   fs/ocfs2/journal.c | 21 +--------------------
>>   fs/ocfs2/super.c   | 33 +++++++++++++++++++++++++++++++++
>>   3 files changed, 36 insertions(+), 22 deletions(-)
>>
>> diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
>> index 5739dc301569..bb116c39b581 100644
>> --- a/fs/ocfs2/inode.c
>> +++ b/fs/ocfs2/inode.c
>> @@ -125,6 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
>>   	struct inode *inode = NULL;
>>   	struct super_block *sb = osb->sb;
>>   	struct ocfs2_find_inode_args args;
>> +	journal_t *journal = osb->journal->j_journal;
>>   
>>   	trace_ocfs2_iget_begin((unsigned long long)blkno, flags,
>>   			       sysfile_type);
>> @@ -171,11 +172,10 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
>>   	 * part of the transaction - the inode could have been reclaimed and
>>   	 * now it is reread from disk.
>>   	 */
>> -	if (osb->journal) {
>> +	if (journal) {
>>   		transaction_t *transaction;
>>   		tid_t tid;
>>   		struct ocfs2_inode_info *oi = OCFS2_I(inode);
>> -		journal_t *journal = osb->journal->j_journal;
>>   
>>   		read_lock(&journal->j_state_lock);
>>   		if (journal->j_running_transaction)
>> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
>> index 1887a2708709..afb85de3bb60 100644
>> --- a/fs/ocfs2/journal.c
>> +++ b/fs/ocfs2/journal.c
>> @@ -815,30 +815,11 @@ int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
>>   	int status = -1;
>>   	struct inode *inode = NULL; /* the journal inode */
>>   	journal_t *j_journal = NULL;
>> -	struct ocfs2_journal *journal = NULL;
>> +	struct ocfs2_journal *journal = osb->journal;
>>   	struct ocfs2_dinode *di = NULL;
>>   	struct buffer_head *bh = NULL;
>>   	int inode_lock = 0;
>>   
>> -	/* initialize our journal structure */
>> -	journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
>> -	if (!journal) {
>> -		mlog(ML_ERROR, "unable to alloc journal\n");
>> -		status = -ENOMEM;
>> -		goto done;
>> -	}
>> -	osb->journal = journal;
>> -	journal->j_osb = osb;
>> -
>> -	atomic_set(&journal->j_num_trans, 0);
>> -	init_rwsem(&journal->j_trans_barrier);
>> -	init_waitqueue_head(&journal->j_checkpointed);
>> -	spin_lock_init(&journal->j_lock);
>> -	journal->j_trans_id = 1UL;
>> -	INIT_LIST_HEAD(&journal->j_la_cleanups);
>> -	INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
>> -	journal->j_state = OCFS2_JOURNAL_FREE;
>> -
>>   	/* already have the inode for our journal */
>>   	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
>>   					    osb->slot_num);
>> diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
>> index 477cdf94122e..5f63a2333e52 100644
>> --- a/fs/ocfs2/super.c
>> +++ b/fs/ocfs2/super.c
>> @@ -2015,6 +2015,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
>>   	int i, cbits, bbits;
>>   	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
>>   	struct inode *inode = NULL;
>> +	struct ocfs2_journal *journal;
>>   	struct ocfs2_super *osb;
>>   	u64 total_blocks;
>>   
>> @@ -2195,6 +2196,32 @@ static int ocfs2_initialize_super(struct super_block *sb,
>>   
>>   	get_random_bytes(&osb->s_next_generation, sizeof(u32));
>>   
>> +	/* FIXME
>> +	 * This should be done in ocfs2_journal_init(), but unknown
>> +	 * ordering issues will cause the filesystem to crash.
>> +	 * If anyone wants to figure out what part of the code
>> +	 * refers to osb->journal before ocfs2_journal_init() is run,
>> +	 * be my guest.
>> +	 */
>> +	/* initialize our journal structure */
>> +	journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
>> +	if (!journal) {
>> +		mlog(ML_ERROR, "unable to alloc journal\n");
>> +		status = -ENOMEM;
>> +		goto bail;
>> +	}
>> +	osb->journal = journal;
>> +	journal->j_osb = osb;
>> +
>> +	atomic_set(&journal->j_num_trans, 0);
>> +	init_rwsem(&journal->j_trans_barrier);
>> +	init_waitqueue_head(&journal->j_checkpointed);
>> +	spin_lock_init(&journal->j_lock);
>> +	journal->j_trans_id = 1UL;
>> +	INIT_LIST_HEAD(&journal->j_la_cleanups);
>> +	INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
>> +	journal->j_state = OCFS2_JOURNAL_FREE;
>> +
> We may fold these into a new function, such as ocfs2_journal_alloc().
> 

OK, will create a new function in journal.c
For this area FIXME comment, the legacy comment humor & vague. In my view, we
already got the order issue clearly, why not we change it as below?
     /*
      * FIXME
      * This should be done in ocfs2_journal_init(), but any inode
      * writes back operation will cause the filesystem to crash.
      */

Thanks,
Heming
> 
>>   	INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs);
>>   	init_llist_head(&osb->dquot_drop_list);
>>   
>> @@ -2483,6 +2510,12 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
>>   
>>   	kfree(osb->osb_orphan_wipes);
>>   	kfree(osb->slot_recovery_generations);
>> +	/* FIXME
>> +	 * This belongs in journal shutdown, but because we have to
>> +	 * allocate osb->journal at the middle of ocfs2_initialize_super(),
>> +	 * we free it here.
>> +	 */
>> +	kfree(osb->journal);
>>   	kfree(osb->local_alloc_copy);
>>   	kfree(osb->uuid_str);
>>   	kfree(osb->vol_label);
>
Joseph Qi April 10, 2022, noon UTC | #3
On 4/10/22 12:28 AM, heming.zhao@suse.com wrote:
> On 4/9/22 21:11, Joseph Qi wrote:
>> Suggest rename the subject, something like
>> ocfs2: fix mount crash if journal is not alloced
> 
> OK.
> 
>>
>> On 4/8/22 6:30 PM, Heming Zhao wrote:
>>> After commit da5e7c87827e8 ("ocfs2: cleanup journal init and shutdown"),
>>> journal init later than before, it makes NULL pointer access in free
>>> routine.
>>>
>>> Crash flow:
>>>
>>> ocfs2_fill_super
>>>   + ocfs2_mount_volume
>>>   |  + ocfs2_dlm_init //fail & return, osb->journal is NULL.
>>>   |  + ...
>>>   |  + ocfs2_check_volume //no chance to init osb->journal
>>>   |
>>>   + ...
>>>   + ocfs2_dismount_volume
>>>      ocfs2_release_system_inodes
>>>        ...
>>>         evict
>>>          ...
>>>           ocfs2_clear_inode
>>>            ocfs2_checkpoint_inode
>>>             ocfs2_ci_fully_checkpointed
>>>              time_after(journal->j_trans_id, ci->ci_last_trans)
>>>               + journal is empty, crash!
>>>
>>> For fixing, there are three solutions:
>>>
>>> 1> Revert commit da5e7c87827e8
>>>
>>> For avoiding kernel crash, this make sense for us. We only concerned
>>> whether there has any non-system inode access before dlm init. The
>>> answer is NO. And all journal replay/recovery handling happen after
>>> dlm & journal init done. So this method is not graceful but workable.
>>>
>>> 2> Add osb->journal check in free inode routine (eg ocfs2_clear_inode)
>>>
>>> The fix code is special for mounting phase, but it will continue
>>> working after mounting stage. In another word, this method adds useless
>>> code in normal inode free flow.
>>>
>>> 3> Do directly free inode in mounting phase
>>>
>>> This method is brutal/complex and may introduce unsafe code, currently
>>> maintainer didn't like.
>>>
>>> At last, we chose method <1> and partly reverted commit da5e7c87827e8.
>>> We reverted journal init code, and kept cleanup code flow.
>>>
>>> Fixes: da5e7c87827e8 ("ocfs2: cleanup journal init and shutdown")
>>> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
>>> ---
>>>   fs/ocfs2/inode.c   |  4 ++--
>>>   fs/ocfs2/journal.c | 21 +--------------------
>>>   fs/ocfs2/super.c   | 33 +++++++++++++++++++++++++++++++++
>>>   3 files changed, 36 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
>>> index 5739dc301569..bb116c39b581 100644
>>> --- a/fs/ocfs2/inode.c
>>> +++ b/fs/ocfs2/inode.c
>>> @@ -125,6 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
>>>       struct inode *inode = NULL;
>>>       struct super_block *sb = osb->sb;
>>>       struct ocfs2_find_inode_args args;
>>> +    journal_t *journal = osb->journal->j_journal;
>>>         trace_ocfs2_iget_begin((unsigned long long)blkno, flags,
>>>                      sysfile_type);
>>> @@ -171,11 +172,10 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
>>>        * part of the transaction - the inode could have been reclaimed and
>>>        * now it is reread from disk.
>>>        */
>>> -    if (osb->journal) {
>>> +    if (journal) {
>>>           transaction_t *transaction;
>>>           tid_t tid;
>>>           struct ocfs2_inode_info *oi = OCFS2_I(inode);
>>> -        journal_t *journal = osb->journal->j_journal;
>>>             read_lock(&journal->j_state_lock);
>>>           if (journal->j_running_transaction)
>>> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
>>> index 1887a2708709..afb85de3bb60 100644
>>> --- a/fs/ocfs2/journal.c
>>> +++ b/fs/ocfs2/journal.c
>>> @@ -815,30 +815,11 @@ int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
>>>       int status = -1;
>>>       struct inode *inode = NULL; /* the journal inode */
>>>       journal_t *j_journal = NULL;
>>> -    struct ocfs2_journal *journal = NULL;
>>> +    struct ocfs2_journal *journal = osb->journal;
>>>       struct ocfs2_dinode *di = NULL;
>>>       struct buffer_head *bh = NULL;
>>>       int inode_lock = 0;
>>>   -    /* initialize our journal structure */
>>> -    journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
>>> -    if (!journal) {
>>> -        mlog(ML_ERROR, "unable to alloc journal\n");
>>> -        status = -ENOMEM;
>>> -        goto done;
>>> -    }
>>> -    osb->journal = journal;
>>> -    journal->j_osb = osb;
>>> -
>>> -    atomic_set(&journal->j_num_trans, 0);
>>> -    init_rwsem(&journal->j_trans_barrier);
>>> -    init_waitqueue_head(&journal->j_checkpointed);
>>> -    spin_lock_init(&journal->j_lock);
>>> -    journal->j_trans_id = 1UL;
>>> -    INIT_LIST_HEAD(&journal->j_la_cleanups);
>>> -    INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
>>> -    journal->j_state = OCFS2_JOURNAL_FREE;
>>> -
>>>       /* already have the inode for our journal */
>>>       inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
>>>                           osb->slot_num);
>>> diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
>>> index 477cdf94122e..5f63a2333e52 100644
>>> --- a/fs/ocfs2/super.c
>>> +++ b/fs/ocfs2/super.c
>>> @@ -2015,6 +2015,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
>>>       int i, cbits, bbits;
>>>       struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
>>>       struct inode *inode = NULL;
>>> +    struct ocfs2_journal *journal;
>>>       struct ocfs2_super *osb;
>>>       u64 total_blocks;
>>>   @@ -2195,6 +2196,32 @@ static int ocfs2_initialize_super(struct super_block *sb,
>>>         get_random_bytes(&osb->s_next_generation, sizeof(u32));
>>>   +    /* FIXME
>>> +     * This should be done in ocfs2_journal_init(), but unknown
>>> +     * ordering issues will cause the filesystem to crash.
>>> +     * If anyone wants to figure out what part of the code
>>> +     * refers to osb->journal before ocfs2_journal_init() is run,
>>> +     * be my guest.
>>> +     */
>>> +    /* initialize our journal structure */
>>> +    journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
>>> +    if (!journal) {
>>> +        mlog(ML_ERROR, "unable to alloc journal\n");
>>> +        status = -ENOMEM;
>>> +        goto bail;
>>> +    }
>>> +    osb->journal = journal;
>>> +    journal->j_osb = osb;
>>> +
>>> +    atomic_set(&journal->j_num_trans, 0);
>>> +    init_rwsem(&journal->j_trans_barrier);
>>> +    init_waitqueue_head(&journal->j_checkpointed);
>>> +    spin_lock_init(&journal->j_lock);
>>> +    journal->j_trans_id = 1UL;
>>> +    INIT_LIST_HEAD(&journal->j_la_cleanups);
>>> +    INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
>>> +    journal->j_state = OCFS2_JOURNAL_FREE;
>>> +
>> We may fold these into a new function, such as ocfs2_journal_alloc().
>>
> 
> OK, will create a new function in journal.c
> For this area FIXME comment, the legacy comment humor & vague. In my view, we
> already got the order issue clearly, why not we change it as below?
>     /*
>      * FIXME
>      * This should be done in ocfs2_journal_init(), but any inode
>      * writes back operation will cause the filesystem to crash.
>      */

Sounds good.

Thanks,
Joseph
diff mbox series

Patch

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 5739dc301569..bb116c39b581 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -125,6 +125,7 @@  struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
 	struct inode *inode = NULL;
 	struct super_block *sb = osb->sb;
 	struct ocfs2_find_inode_args args;
+	journal_t *journal = osb->journal->j_journal;
 
 	trace_ocfs2_iget_begin((unsigned long long)blkno, flags,
 			       sysfile_type);
@@ -171,11 +172,10 @@  struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
 	 * part of the transaction - the inode could have been reclaimed and
 	 * now it is reread from disk.
 	 */
-	if (osb->journal) {
+	if (journal) {
 		transaction_t *transaction;
 		tid_t tid;
 		struct ocfs2_inode_info *oi = OCFS2_I(inode);
-		journal_t *journal = osb->journal->j_journal;
 
 		read_lock(&journal->j_state_lock);
 		if (journal->j_running_transaction)
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 1887a2708709..afb85de3bb60 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -815,30 +815,11 @@  int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
 	int status = -1;
 	struct inode *inode = NULL; /* the journal inode */
 	journal_t *j_journal = NULL;
-	struct ocfs2_journal *journal = NULL;
+	struct ocfs2_journal *journal = osb->journal;
 	struct ocfs2_dinode *di = NULL;
 	struct buffer_head *bh = NULL;
 	int inode_lock = 0;
 
-	/* initialize our journal structure */
-	journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
-	if (!journal) {
-		mlog(ML_ERROR, "unable to alloc journal\n");
-		status = -ENOMEM;
-		goto done;
-	}
-	osb->journal = journal;
-	journal->j_osb = osb;
-
-	atomic_set(&journal->j_num_trans, 0);
-	init_rwsem(&journal->j_trans_barrier);
-	init_waitqueue_head(&journal->j_checkpointed);
-	spin_lock_init(&journal->j_lock);
-	journal->j_trans_id = 1UL;
-	INIT_LIST_HEAD(&journal->j_la_cleanups);
-	INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
-	journal->j_state = OCFS2_JOURNAL_FREE;
-
 	/* already have the inode for our journal */
 	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
 					    osb->slot_num);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 477cdf94122e..5f63a2333e52 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2015,6 +2015,7 @@  static int ocfs2_initialize_super(struct super_block *sb,
 	int i, cbits, bbits;
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
 	struct inode *inode = NULL;
+	struct ocfs2_journal *journal;
 	struct ocfs2_super *osb;
 	u64 total_blocks;
 
@@ -2195,6 +2196,32 @@  static int ocfs2_initialize_super(struct super_block *sb,
 
 	get_random_bytes(&osb->s_next_generation, sizeof(u32));
 
+	/* FIXME
+	 * This should be done in ocfs2_journal_init(), but unknown
+	 * ordering issues will cause the filesystem to crash.
+	 * If anyone wants to figure out what part of the code
+	 * refers to osb->journal before ocfs2_journal_init() is run,
+	 * be my guest.
+	 */
+	/* initialize our journal structure */
+	journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
+	if (!journal) {
+		mlog(ML_ERROR, "unable to alloc journal\n");
+		status = -ENOMEM;
+		goto bail;
+	}
+	osb->journal = journal;
+	journal->j_osb = osb;
+
+	atomic_set(&journal->j_num_trans, 0);
+	init_rwsem(&journal->j_trans_barrier);
+	init_waitqueue_head(&journal->j_checkpointed);
+	spin_lock_init(&journal->j_lock);
+	journal->j_trans_id = 1UL;
+	INIT_LIST_HEAD(&journal->j_la_cleanups);
+	INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
+	journal->j_state = OCFS2_JOURNAL_FREE;
+
 	INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs);
 	init_llist_head(&osb->dquot_drop_list);
 
@@ -2483,6 +2510,12 @@  static void ocfs2_delete_osb(struct ocfs2_super *osb)
 
 	kfree(osb->osb_orphan_wipes);
 	kfree(osb->slot_recovery_generations);
+	/* FIXME
+	 * This belongs in journal shutdown, but because we have to
+	 * allocate osb->journal at the middle of ocfs2_initialize_super(),
+	 * we free it here.
+	 */
+	kfree(osb->journal);
 	kfree(osb->local_alloc_copy);
 	kfree(osb->uuid_str);
 	kfree(osb->vol_label);